Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
97 changes: 96 additions & 1 deletion src/odr/internal/oldms/word/io.cpp
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
#include <odr/internal/oldms/word/io.hpp>

#include "odr/internal/util/string_util.hpp"

#include <odr/internal/util/byte_stream_util.hpp>
#include <odr/internal/util/stream_util.hpp>
#include <odr/internal/util/string_util.hpp>

namespace odr::internal::oldms {

Expand Down Expand Up @@ -190,4 +192,97 @@ void oldms::skip_Prc(std::istream &in) {
in.ignore(cbGrpprl);
}

std::string oldms::read_string_compressed(std::istream &in,
const std::size_t size) {
static constexpr auto eof = std::istream::traits_type::eof();

std::string result;
result.reserve(size);

for (std::size_t i = 0; i < size; ++i) {
const auto ci = in.get();
if (ci == eof) {
throw std::runtime_error("Unexpected end of input");
}
if (ci < 0 || ci > 0xFF) {
throw std::runtime_error("Unexpected input: " + std::to_string(ci));
}
const char c = static_cast<char>(ci);
if (const std::optional<char16_t> uncompressed = uncompress_char(c);
uncompressed.has_value()) {
util::string::append_c32(*uncompressed, result);
} else {
result.push_back(c);
}
}

return result;
}

std::u16string oldms::read_string_uncompressed(std::istream &in,
const std::size_t size) {
std::u16string result;
result.resize(size);

in.read(reinterpret_cast<char *>(result.data()),
static_cast<std::streamsize>(size * sizeof(char16_t)));

return result;
}

std::optional<char16_t> oldms::uncompress_char(const char c) {
switch (c) {
case '\x82':
return 0x201A;
case '\x83':
return 0x0192;
case '\x84':
return 0x201E;
case '\x85':
return 0x2026;
case '\x86':
return 0x2020;
case '\x87':
return 0x2021;
case '\x88':
return 0x02C6;
case '\x89':
return 0x2030;
case '\x8A':
return 0x0160;
case '\x8B':
return 0x2039;
case '\x8C':
return 0x0152;
case '\x91':
return 0x2018;
case '\x92':
return 0x2019;
case '\x93':
return 0x201C;
case '\x94':
return 0x201D;
case '\x95':
return 0x2022;
case '\x96':
return 0x2013;
case '\x97':
return 0x2014;
case '\x98':
return 0x02DC;
case '\x99':
return 0x2122;
case '\x9A':
return 0x0161;
case '\x9B':
return 0x203A;
case '\x9C':
return 0x0153;
case '\x9F':
return 0x0178;
default:
return std::nullopt;
}
}

} // namespace odr::internal
6 changes: 6 additions & 0 deletions src/odr/internal/oldms/word/io.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
#include <functional>
#include <iosfwd>
#include <memory>
#include <optional>

namespace odr::internal::oldms {

Expand All @@ -30,4 +31,9 @@ void read_Clx(std::istream &in, const HandlePrc &handle_Prc,
const HandlePcdt &handle_Pcdt);
void skip_Prc(std::istream &in);

std::string read_string_compressed(std::istream &in, std::size_t size);
std::u16string read_string_uncompressed(std::istream &in, std::size_t size);

std::optional<char16_t> uncompress_char(char c);

} // namespace odr::internal::oldms
5 changes: 4 additions & 1 deletion src/odr/internal/util/string_util.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
#include <algorithm>
#include <cstdint>
#include <iomanip>
#include <locale>
#include <sstream>

#include <utf8cpp/utf8/cpp17.h>
Expand Down Expand Up @@ -85,4 +84,8 @@ std::string string::c16str_to_string(const char16_t *c16str,
return u16string_to_string(std::u16string(c16str, length / 2));
}

void string::append_c32(const char32_t c, std::string &string) {
utf8::append(c, string);
}

} // namespace odr::internal::util
1 change: 1 addition & 0 deletions src/odr/internal/util/string_util.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -26,5 +26,6 @@ std::string to_string(double d, int precision);
std::string u16string_to_string(const std::u16string &string);
std::u16string string_to_u16string(const std::string &string);
std::string c16str_to_string(const char16_t *c16str, std::size_t length);
void append_c32(char32_t c, std::string &string);

} // namespace odr::internal::util::string
4 changes: 2 additions & 2 deletions test/src/internal/oldms/oldms_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -78,8 +78,8 @@ TEST(OldMs, test) {

const auto document_stream = files.open("/WordDocument").stream();
document_stream->seekg(first_text_offset);
const std::string first_text =
internal::util::stream::read(*document_stream, first_text_length);
const std::string first_text = internal::oldms::read_string_compressed(
*document_stream, first_text_length);
std::cout << "first_text " << first_text << std::endl;
});
}