age-core: Defer Base64 decoding entirely

The new `AgeStanza::body` method replaces the previous `body` property,
enabling a wrapping parser to defer Base64 decoding until the end.
This commit is contained in:
Jack Grigg 2021-08-08 00:23:04 +01:00
parent 6017c23523
commit 0fe89b9aec
2 changed files with 67 additions and 58 deletions

View file

@ -9,6 +9,9 @@ to 1.0.0 are beta releases.
## [Unreleased]
### Changed
- MSRV is now 1.51.0.
- The `body` property of `age_core::format::AgeStanza` has been replaced by the
`AgeStanza::body` method, to enable enclosing parsers to defer Base64 decoding
until the very end.
## [0.6.0] - 2021-05-02
### Security

View file

@ -36,7 +36,30 @@ pub struct AgeStanza<'a> {
/// Zero or more arguments.
pub args: Vec<&'a str>,
/// The body of the stanza, containing a wrapped [`FileKey`].
pub body: Vec<u8>,
///
/// Represented as the set of Base64-encoded lines for efficiency (so the caller can
/// defer the cost of decoding until the structure containing this stanza has been
/// fully-parsed).
body: Vec<&'a [u8]>,
}
impl<'a> AgeStanza<'a> {
/// Decodes and returns the body of this stanza.
pub fn body(&self) -> Vec<u8> {
// An AgeStanza will always contain at least one chunk.
let (partial_chunk, full_chunks) = self.body.split_last().unwrap();
// This is faster than collecting from a flattened iterator.
let mut data = vec![0; full_chunks.len() * 64 + partial_chunk.len()];
for (i, chunk) in full_chunks.iter().enumerate() {
// These chunks are guaranteed to be full by construction.
data[i * 64..(i + 1) * 64].copy_from_slice(chunk);
}
data[full_chunks.len() * 64..].copy_from_slice(partial_chunk);
// The chunks are guaranteed to contain Base64 characters by construction.
base64::decode_config(&data, base64::STANDARD_NO_PAD).unwrap()
}
}
/// A section of the age header that encapsulates the file key as encrypted to a specific
@ -55,10 +78,11 @@ pub struct Stanza {
impl From<AgeStanza<'_>> for Stanza {
fn from(stanza: AgeStanza<'_>) -> Self {
let body = stanza.body();
Stanza {
tag: stanza.tag.to_string(),
args: stanza.args.into_iter().map(|s| s.to_string()).collect(),
body: stanza.body,
body,
}
}
}
@ -105,9 +129,9 @@ pub fn grease_the_joint() -> Stanza {
pub mod read {
use nom::{
branch::alt,
bytes::streaming::{tag, take, take_while1, take_while_m_n},
bytes::streaming::{tag, take_while1, take_while_m_n},
character::streaming::newline,
combinator::{map, map_opt, opt, verify},
combinator::{map, map_opt, opt},
multi::{many_till, separated_list1},
sequence::{pair, preceded, terminated},
IResult,
@ -115,6 +139,15 @@ pub mod read {
use super::{AgeStanza, STANZA_TAG};
fn is_base64_char(c: u8) -> bool {
// Check against the ASCII values of the standard Base64 character set.
match c {
// A..=Z | a..=z | 0..=9 | + | /
65..=90 | 97..=122 | 48..=57 | 43 | 47 => true,
_ => false,
}
}
/// From the age specification:
/// ```text
/// ... an arbitrary string is a sequence of ASCII characters with values 33 to 126.
@ -126,63 +159,36 @@ pub mod read {
})(input)
}
/// Returns the slice of input up to (but not including) the first LF
/// character, if that slice is entirely Base64 characters
///
/// # Errors
///
/// - Returns Failure on an empty slice.
/// - Returns Incomplete(1) if a LF is not found.
fn take_b64_line1(input: &[u8]) -> IResult<&[u8], &[u8]> {
verify(take_while1(|c| c != b'\n'), |bytes: &[u8]| {
// STANDARD_NO_PAD only differs from STANDARD during serialization; the base64
// crate always allows padding during parsing. We require canonical
// serialization, so we explicitly reject padding characters here.
base64::decode_config(bytes, base64::STANDARD_NO_PAD).is_ok() && !bytes.contains(&b'=')
})(input)
}
fn concatenate_chunks(full_chunks: &[&[u8]], partial_chunk: &[u8]) -> Vec<u8> {
// This is faster than collecting from a flattened iterator.
let mut data = vec![0; full_chunks.len() * 64 + partial_chunk.len()];
for (i, chunk) in full_chunks.iter().enumerate() {
data[i * 64..(i + 1) * 64].copy_from_slice(chunk);
}
data[full_chunks.len() * 64..].copy_from_slice(partial_chunk);
data
}
fn wrapped_encoded_data(input: &[u8]) -> IResult<&[u8], Vec<u8>> {
map_opt(
fn wrapped_encoded_data(input: &[u8]) -> IResult<&[u8], Vec<&[u8]>> {
map(
many_till(
// Any body lines before the last MUST be full-length.
terminated(take(64usize), newline),
terminated(take_while_m_n(64, 64, is_base64_char), newline),
// Last body line MUST be short (empty if necessary).
terminated(take_while_m_n(0, 63, |c| c != b'\n'), newline),
terminated(take_while_m_n(0, 63, is_base64_char), newline),
),
|(full_chunks, partial_chunk)| {
let data = concatenate_chunks(&full_chunks, partial_chunk);
if data.contains(&b'=') {
None
} else {
base64::decode_config(&data, base64::STANDARD_NO_PAD).ok()
}
|(full_chunks, partial_chunk): (Vec<&[u8]>, &[u8])| {
let mut chunks = full_chunks;
chunks.push(partial_chunk);
chunks
},
)(input)
}
fn legacy_wrapped_encoded_data(input: &[u8]) -> IResult<&[u8], Vec<u8>> {
map_opt(separated_list1(newline, take_b64_line1), |chunks| {
// Enforce that the only chunk allowed to be shorter than 64 characters
// is the last chunk.
let (partial_chunk, full_chunks) = chunks.split_last().unwrap();
if full_chunks.iter().any(|s| s.len() != 64) || partial_chunk.len() > 64 {
None
} else {
let data = concatenate_chunks(full_chunks, partial_chunk);
base64::decode_config(&data, base64::STANDARD_NO_PAD).ok()
}
})(input)
fn legacy_wrapped_encoded_data(input: &[u8]) -> IResult<&[u8], Vec<&[u8]>> {
map_opt(
separated_list1(newline, take_while1(is_base64_char)),
|chunks: Vec<&[u8]>| {
// Enforce that the only chunk allowed to be shorter than 64 characters
// is the last chunk.
let (partial_chunk, full_chunks) = chunks.split_last().unwrap();
if full_chunks.iter().any(|s| s.len() != 64) || partial_chunk.len() > 64 {
None
} else {
Some(chunks)
}
},
)(input)
}
/// Reads an age stanza.
@ -222,7 +228,7 @@ pub mod read {
AgeStanza {
tag,
args,
body: body.unwrap_or_default(),
body: body.unwrap_or_else(|| vec![&[]]),
}
},
)(input)
@ -341,7 +347,7 @@ C3ZAeY64NXS4QFrksLm3EGz+uPRyI0eQsWw7LWbbYig
let (_, stanza) = read::age_stanza(test_stanza.as_bytes()).unwrap();
assert_eq!(stanza.tag, test_tag);
assert_eq!(stanza.args, test_args);
assert_eq!(stanza.body, test_body);
assert_eq!(stanza.body(), test_body);
let mut buf = vec![];
cookie_factory::gen_simple(write::age_stanza(test_tag, test_args, &test_body), &mut buf)
@ -363,7 +369,7 @@ C3ZAeY64NXS4QFrksLm3EGz+uPRyI0eQsWw7LWbbYig
let (_, stanza) = read::age_stanza(test_stanza.as_bytes()).unwrap();
assert_eq!(stanza.tag, test_tag);
assert_eq!(stanza.args, test_args);
assert_eq!(stanza.body, test_body);
assert_eq!(stanza.body(), test_body);
let mut buf = vec![];
cookie_factory::gen_simple(write::age_stanza(test_tag, test_args, test_body), &mut buf)
@ -390,7 +396,7 @@ xD7o4VEOu1t7KZQ1gDgq2FPzBEeSRqbnqvQEXdLRYy143BxR6oFxsUUJCRB0ErXA
let (_, stanza) = read::age_stanza(test_stanza.as_bytes()).unwrap();
assert_eq!(stanza.tag, test_tag);
assert_eq!(stanza.args, test_args);
assert_eq!(stanza.body, test_body);
assert_eq!(stanza.body(), test_body);
let mut buf = vec![];
cookie_factory::gen_simple(write::age_stanza(test_tag, test_args, &test_body), &mut buf)
@ -421,6 +427,6 @@ xD7o4VEOu1t7KZQ1gDgq2FPzBEeSRqbnqvQEXdLRYy143BxR6oFxsUUJCRB0ErXA
let (_, stanza) = read::legacy_age_stanza(test_stanza.as_bytes()).unwrap();
assert_eq!(stanza.tag, test_tag);
assert_eq!(stanza.args, test_args);
assert_eq!(stanza.body, test_body);
assert_eq!(stanza.body(), test_body);
}
}