Browse Source

Use encoding crate in value_codec.rs.

wip
Titouan Rigoudy 4 years ago
parent
commit
a98672f282
3 changed files with 47 additions and 19 deletions
  1. +16
    -0
      Cargo.lock
  2. +1
    -0
      Cargo.toml
  3. +30
    -19
      src/proto/value_codec.rs

+ 16
- 0
Cargo.lock View File

@ -62,6 +62,11 @@ name = "cfg-if"
version = "0.1.10" version = "0.1.10"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "cfg-if"
version = "1.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]] [[package]]
name = "cloudabi" name = "cloudabi"
version = "0.0.3" version = "0.0.3"
@ -188,6 +193,14 @@ name = "encoding_index_tests"
version = "0.1.4" version = "0.1.4"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "encoding_rs"
version = "0.8.26"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"cfg-if 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]] [[package]]
name = "env_logger" name = "env_logger"
version = "0.3.5" version = "0.3.5"
@ -742,6 +755,7 @@ dependencies = [
"bytes 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)", "bytes 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)",
"crossbeam-channel 0.3.9 (registry+https://github.com/rust-lang/crates.io-index)", "crossbeam-channel 0.3.9 (registry+https://github.com/rust-lang/crates.io-index)",
"encoding 0.2.33 (registry+https://github.com/rust-lang/crates.io-index)", "encoding 0.2.33 (registry+https://github.com/rust-lang/crates.io-index)",
"encoding_rs 0.8.26 (registry+https://github.com/rust-lang/crates.io-index)",
"env_logger 0.3.5 (registry+https://github.com/rust-lang/crates.io-index)", "env_logger 0.3.5 (registry+https://github.com/rust-lang/crates.io-index)",
"futures 0.1.29 (registry+https://github.com/rust-lang/crates.io-index)", "futures 0.1.29 (registry+https://github.com/rust-lang/crates.io-index)",
"log 0.3.9 (registry+https://github.com/rust-lang/crates.io-index)", "log 0.3.9 (registry+https://github.com/rust-lang/crates.io-index)",
@ -1106,6 +1120,7 @@ dependencies = [
"checksum bytes 0.4.12 (registry+https://github.com/rust-lang/crates.io-index)" = "206fdffcfa2df7cbe15601ef46c813fce0965eb3286db6b56c583b814b51c81c" "checksum bytes 0.4.12 (registry+https://github.com/rust-lang/crates.io-index)" = "206fdffcfa2df7cbe15601ef46c813fce0965eb3286db6b56c583b814b51c81c"
"checksum bytes 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)" = "ad1f8e949d755f9d79112b5bb46938e0ef9d3804a0b16dfab13aafcaa5f0fa72" "checksum bytes 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)" = "ad1f8e949d755f9d79112b5bb46938e0ef9d3804a0b16dfab13aafcaa5f0fa72"
"checksum cfg-if 0.1.10 (registry+https://github.com/rust-lang/crates.io-index)" = "4785bdd1c96b2a846b2bd7cc02e86b6b3dbf14e7e53446c4f54c92a361040822" "checksum cfg-if 0.1.10 (registry+https://github.com/rust-lang/crates.io-index)" = "4785bdd1c96b2a846b2bd7cc02e86b6b3dbf14e7e53446c4f54c92a361040822"
"checksum cfg-if 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)" = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
"checksum cloudabi 0.0.3 (registry+https://github.com/rust-lang/crates.io-index)" = "ddfc5b9aa5d4507acaf872de71051dfd0e309860e88966e1051e462a077aac4f" "checksum cloudabi 0.0.3 (registry+https://github.com/rust-lang/crates.io-index)" = "ddfc5b9aa5d4507acaf872de71051dfd0e309860e88966e1051e462a077aac4f"
"checksum crossbeam-channel 0.3.9 (registry+https://github.com/rust-lang/crates.io-index)" = "c8ec7fcd21571dc78f96cc96243cab8d8f035247c3efd16c687be154c3fa9efa" "checksum crossbeam-channel 0.3.9 (registry+https://github.com/rust-lang/crates.io-index)" = "c8ec7fcd21571dc78f96cc96243cab8d8f035247c3efd16c687be154c3fa9efa"
"checksum crossbeam-deque 0.7.3 (registry+https://github.com/rust-lang/crates.io-index)" = "9f02af974daeee82218205558e51ec8768b48cf524bd01d550abe5573a608285" "checksum crossbeam-deque 0.7.3 (registry+https://github.com/rust-lang/crates.io-index)" = "9f02af974daeee82218205558e51ec8768b48cf524bd01d550abe5573a608285"
@ -1120,6 +1135,7 @@ dependencies = [
"checksum encoding-index-singlebyte 1.20141219.5 (registry+https://github.com/rust-lang/crates.io-index)" = "3351d5acffb224af9ca265f435b859c7c01537c0849754d3db3fdf2bfe2ae84a" "checksum encoding-index-singlebyte 1.20141219.5 (registry+https://github.com/rust-lang/crates.io-index)" = "3351d5acffb224af9ca265f435b859c7c01537c0849754d3db3fdf2bfe2ae84a"
"checksum encoding-index-tradchinese 1.20141219.5 (registry+https://github.com/rust-lang/crates.io-index)" = "fd0e20d5688ce3cab59eb3ef3a2083a5c77bf496cb798dc6fcdb75f323890c18" "checksum encoding-index-tradchinese 1.20141219.5 (registry+https://github.com/rust-lang/crates.io-index)" = "fd0e20d5688ce3cab59eb3ef3a2083a5c77bf496cb798dc6fcdb75f323890c18"
"checksum encoding_index_tests 0.1.4 (registry+https://github.com/rust-lang/crates.io-index)" = "a246d82be1c9d791c5dfde9a2bd045fc3cbba3fa2b11ad558f27d01712f00569" "checksum encoding_index_tests 0.1.4 (registry+https://github.com/rust-lang/crates.io-index)" = "a246d82be1c9d791c5dfde9a2bd045fc3cbba3fa2b11ad558f27d01712f00569"
"checksum encoding_rs 0.8.26 (registry+https://github.com/rust-lang/crates.io-index)" = "801bbab217d7f79c0062f4f7205b5d4427c6d1a7bd7aafdd1475f7c59d62b283"
"checksum env_logger 0.3.5 (registry+https://github.com/rust-lang/crates.io-index)" = "15abd780e45b3ea4f76b4e9a26ff4843258dd8a3eed2775a0e7368c2e7936c2f" "checksum env_logger 0.3.5 (registry+https://github.com/rust-lang/crates.io-index)" = "15abd780e45b3ea4f76b4e9a26ff4843258dd8a3eed2775a0e7368c2e7936c2f"
"checksum fnv 1.0.7 (registry+https://github.com/rust-lang/crates.io-index)" = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1" "checksum fnv 1.0.7 (registry+https://github.com/rust-lang/crates.io-index)" = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1"
"checksum fuchsia-cprng 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "a06f77d526c1a601b7c4cdd98f54b5eaabffc14d5f2f0296febdc7f357c6d3ba" "checksum fuchsia-cprng 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "a06f77d526c1a601b7c4cdd98f54b5eaabffc14d5f2f0296febdc7f357c6d3ba"


+ 1
- 0
Cargo.toml View File

@ -9,6 +9,7 @@ byteorder = "^0.5.1"
bytes = "^1.0" bytes = "^1.0"
crossbeam-channel = "^0.3" crossbeam-channel = "^0.3"
encoding = "^0.2" encoding = "^0.2"
encoding_rs = "^0.8"
env_logger = "^0.3.2" env_logger = "^0.3.2"
futures = "^0.1" futures = "^0.1"
log = "^0.3.5" log = "^0.3.5"


+ 30
- 19
src/proto/value_codec.rs View File

@ -19,8 +19,7 @@ use std::io;
use std::net; use std::net;
use bytes::{BufMut, BytesMut}; use bytes::{BufMut, BytesMut};
use encoding::all::WINDOWS_1252;
use encoding::{DecoderTrap, EncoderTrap, Encoding};
use encoding_rs::WINDOWS_1252;
use std::convert::{TryFrom, TryInto}; use std::convert::{TryFrom, TryInto};
use thiserror::Error; use thiserror::Error;
@ -225,11 +224,11 @@ impl<'a> ValueDecoder<'a> {
let position = self.position; let position = self.position;
let bytes = self.consume(length)?; let bytes = self.consume(length)?;
let result = WINDOWS_1252.decode(bytes, DecoderTrap::Strict);
let result = WINDOWS_1252.decode_without_bom_handling_and_without_replacement(bytes);
match result { match result {
Ok(string) => Ok(string),
Err(error) => Err(ValueDecodeError::InvalidString {
cause: error.to_string(),
Some(string) => Ok(string.into_owned()),
None => Err(ValueDecodeError::InvalidString {
cause: "malformed sequence in Windows-1252-encoded string".to_string(),
position: position, position: position,
}), }),
} }
@ -361,12 +360,13 @@ impl<'a> ValueEncoder<'a> {
// Reserve space for the length prefix. // Reserve space for the length prefix.
let mut prefixer = Prefixer::new(self.buffer); let mut prefixer = Prefixer::new(self.buffer);
// Encode the string. We are quite certain this cannot fail because we
// use EncoderTrap::Replace to replace any unencodable characters with
// '?' which is always encodable in Windows-1252.
//
// TODO: Switch to the encoding_rs crate.
let bytes = WINDOWS_1252.encode(val, EncoderTrap::Replace).unwrap();
// Encode the string. This cannot fail because any unmappable characters
// are replaced.
let (bytes, encoding, _did_replace) = WINDOWS_1252.encode(val);
// Encodings in full generality can have a different "output encoding"
// but that is not the case of Windows-1252.
assert_eq!(encoding, WINDOWS_1252);
prefixer.suffix_mut().extend_from_slice(&bytes); prefixer.suffix_mut().extend_from_slice(&bytes);
@ -747,11 +747,18 @@ pub mod tests {
} }
// A few strings and their corresponding encodings. // A few strings and their corresponding encodings.
const STRING_ENCODINGS: [(&'static str, &'static [u8]); 3] = [
const STRING_ENCODINGS: [(&'static str, &'static [u8]); 4] = [
("", &[0, 0, 0, 0]), ("", &[0, 0, 0, 0]),
("hey!", &[4, 0, 0, 0, 104, 101, 121, 33]), ("hey!", &[4, 0, 0, 0, 104, 101, 121, 33]),
// Windows 1252 specific codepoints. // Windows 1252 specific codepoints.
("‘’“”€", &[5, 0, 0, 0, 145, 146, 147, 148, 128]), ("‘’“”€", &[5, 0, 0, 0, 145, 146, 147, 148, 128]),
// Undefined codepoints. They are not decoded to representable
// characters, but they do not generate errors either. In particular,
// they survive round-trips through the codec.
(
"\u{81}\u{8D}\u{8F}\u{90}\u{9D}",
&[5, 0, 0, 0, 0x81, 0x8D, 0x8F, 0x90, 0x9D],
),
]; ];
#[test] #[test]
@ -774,14 +781,18 @@ pub mod tests {
fn encode_string_with_unencodable_characters() { fn encode_string_with_unencodable_characters() {
let mut bytes = BytesMut::new(); let mut bytes = BytesMut::new();
ValueEncoder::new(&mut bytes)
.encode_string("忠犬ハチ公")
.unwrap();
ValueEncoder::new(&mut bytes).encode_string("你好").unwrap();
// Characters not in the Windows 1252 codepage are rendered as '?'.
assert_eq!(bytes, vec![5, 0, 0, 0, 63, 63, 63, 63, 63]);
// Characters not in the Windows 1252 codepage are replaced with their
// decimal representations. Thus the output is longer than the input.
assert_eq!(bytes[0..4], [16, 0, 0, 0]);
assert_eq!(&bytes[4..], b"&#20320;&#22909;");
assert_eq!(ValueDecoder::new(&bytes).decode_string().unwrap(), "?????");
// The replaced characters are not decoded back to their original values.
assert_eq!(
ValueDecoder::new(&bytes).decode_string().unwrap(),
"&#20320;&#22909;"
);
} }
#[test] #[test]


Loading…
Cancel
Save