audacia/src/ProjectSerializer.cpp

600 lines
16 KiB
C++

/**********************************************************************
Audacity: A Digital Audio Editor
Audacity(R) is copyright (c) 1999-2010 Audacity Team.
License: GPL v2. See License.txt.
ProjectSerializer.cpp
*******************************************************************//**
\class ProjectSerializer
\brief a class used to (de)serialize the project catalog
*//********************************************************************/
#include "Audacity.h"
#include "ProjectSerializer.h"
#include <algorithm>
#include <cstdint>
#include <mutex>
#include <wx/ustring.h>
///
/// ProjectSerializer class
///
// Simple "binary xml" format used exclusively for project documents.
//
// It is not intended that the user view or modify the file.
//
// It IS intended that very little work be done during auto save, so numbers
// and strings are written in their native format. They will be converted
// during recovery.
//
// The file has 3 main sections:
//
// character size 1 (UTF-8), 2 (UTF-16) or 4 (UTF-32)
// name dictionary dictionary of all names used in the document
// data fields the "encoded" XML document
//
// If a subtree is added, it will be preceded with FT_Push to tell the decoder
// to preserve the active dictionary. The decoder will then restore the
// dictionary when an FT_Pop is encountered. Nesting is unlimited.
//
// To save space, each name (attribute or element) encountered is stored in
// the name dictionary and replaced with the assigned 2-byte identifier.
//
// All strings are in native unicode format, 2-byte or 4-byte.
//
// All name "lengths" are 2-byte signed, so are limited to 32767 bytes long.
// All string/data "lengths" are 4-byte signed.
enum FieldTypes
{
FT_CharSize, // type, ID, value
FT_StartTag, // type, ID
FT_EndTag, // type, ID
FT_String, // type, ID, string length, string
FT_Int, // type, ID, value
FT_Bool, // type, ID, value
FT_Long, // type, ID, value
FT_LongLong, // type, ID, value
FT_SizeT, // type, ID, value
FT_Float, // type, ID, value, digits
FT_Double, // type, ID, value, digits
FT_Data, // type, string length, string
FT_Raw, // type, string length, string
FT_Push, // type only
FT_Pop, // type only
FT_Name // type, ID, name length, name
};
// Static so that the dict can be reused each time.
//
// If entries get added later, like when an envelope node (for example)
// is written and then the envelope is later removed, the dict will still
// contain the envelope name, but that's not a problem.
NameMap ProjectSerializer::mNames;
wxMemoryBuffer ProjectSerializer::mDict;
TranslatableString ProjectSerializer::FailureMessage( const FilePath &/*filePath*/ )
{
return
XO("This recovery file was saved by Audacity 2.3.0 or before.\n"
"You need to run that version of Audacity to recover the project." );
}
namespace {
// Aliases for the FIXED-WIDTH integer types that are used in the file
// format.
// Chosen so that among the four build types (32 bit Windows, 64
// bit Windows, 64 bit Mac clang, Linux g++) presently done (3.0.0
// development), we use the narrowest width of the type on any of them, so
// that anything saved on one build will be read back identically on all
// builds. (Although this means that very large values on some systems might
// be saved and then read back with loss.)
// In fact the only types for which this matters are long (only 32 bits on
// 32 and 64 bit Windows) and size_t (only 32 bits on 32 bit Windows).
using UShort = std::uint16_t;
using Int = std::int32_t;
using Long = std::int32_t; // To save long values
using ULong = std::uint32_t; // To save size_t values
using LongLong = std::int64_t;
// Detect this computer's endianness
bool IsLittleEndian()
{
const std::uint32_t x = 1u;
return
static_cast<const unsigned char*>(static_cast<const void*>(&x))[0];
// We will assume the same for other widths!
}
// In C++20 this could be
// constexpr bool IsLittleEndian = (std::endian::native == std::endian::little);
// static_assert( IsLittleEndian || (std::endian::native == std::endian::big),
// "Oh no! I'm mixed-endian!" );
// Functions that can read and write native integer types to a canonicalized
// little-endian file format. (We don't bother to do the same for floating
// point numbers.)
// Write native little-endian to little-endian file format
template< typename Number >
void WriteLittleEndian( wxMemoryBuffer &out, Number value )
{
out.AppendData( &value, sizeof(value) );
}
// Write native big-endian to little-endian file format
template< typename Number >
void WriteBigEndian( wxMemoryBuffer &out, Number value )
{
auto begin = static_cast<unsigned char*>( static_cast<void*>( &value ) );
std::reverse( begin, begin + sizeof( value ) );
out.AppendData( &value, sizeof(value) );
}
// Read little-endian file format to native little-endian
template< typename Number >
Number ReadLittleEndian( wxMemoryInputStream &in )
{
Number result;
in.Read( &result, sizeof(result) );
return result;
}
// Read little-endian file format to native big-endian
template< typename Number >
Number ReadBigEndian( wxMemoryInputStream &in )
{
Number result;
in.Read( &result, sizeof(result) );
auto begin = static_cast<unsigned char*>( static_cast<void*>( &result ) );
std::reverse( begin, begin + sizeof( result ) );
return result;
}
// Choose between implementations!
static const auto WriteUShort = IsLittleEndian()
? &WriteLittleEndian<UShort> : &WriteBigEndian<UShort>;
static const auto WriteInt = IsLittleEndian()
? &WriteLittleEndian<Int> : &WriteBigEndian<Int>;
static const auto WriteLong = IsLittleEndian()
? &WriteLittleEndian<Long> : &WriteBigEndian<Long>;
static const auto WriteULong = IsLittleEndian()
? &WriteLittleEndian<ULong> : &WriteBigEndian<ULong>;
static const auto WriteLongLong = IsLittleEndian()
? &WriteLittleEndian<LongLong> : &WriteBigEndian<LongLong>;
static const auto ReadUShort = IsLittleEndian()
? &ReadLittleEndian<UShort> : &ReadBigEndian<UShort>;
static const auto ReadInt = IsLittleEndian()
? &ReadLittleEndian<Int> : &ReadBigEndian<Int>;
static const auto ReadLong = IsLittleEndian()
? &ReadLittleEndian<Long> : &ReadBigEndian<Long>;
static const auto ReadULong = IsLittleEndian()
? &ReadLittleEndian<ULong> : &ReadBigEndian<ULong>;
static const auto ReadLongLong = IsLittleEndian()
? &ReadLittleEndian<LongLong> : &ReadBigEndian<LongLong>;
// Functions to read and write certain lengths -- maybe we will change
// our choices for widths or signedness?
using Length = Int; // Instead, as wide as size_t?
static const auto WriteLength = WriteInt;
static const auto ReadLength = ReadInt;
using Digits = Int; // Instead, just an unsigned char?
static const auto WriteDigits = WriteInt;
static const auto ReadDigits = ReadInt;
}
ProjectSerializer::ProjectSerializer(size_t allocSize)
{
mDict.SetBufSize(allocSize);
mBuffer.SetBufSize(allocSize);
static std::once_flag flag;
std::call_once(flag, []{
// Just once per run, store header information in the unique static
// dictionary that will be written into each project that is saved.
// Store the size of "wxChar" so we can convert during recovery in
// case the file is used on a system with a different character size.
char size = sizeof(wxChar);
mDict.AppendByte(FT_CharSize);
mDict.AppendData(&size, 1);
});
mDictChanged = false;
}
ProjectSerializer::~ProjectSerializer()
{
}
void ProjectSerializer::StartTag(const wxString & name)
{
mBuffer.AppendByte(FT_StartTag);
WriteName(name);
}
void ProjectSerializer::EndTag(const wxString & name)
{
mBuffer.AppendByte(FT_EndTag);
WriteName(name);
}
void ProjectSerializer::WriteAttr(const wxString & name, const wxChar *value)
{
WriteAttr(name, wxString(value));
}
void ProjectSerializer::WriteAttr(const wxString & name, const wxString & value)
{
mBuffer.AppendByte(FT_String);
WriteName(name);
const Length len = value.length() * sizeof(wxChar);
WriteLength( mBuffer, len );
mBuffer.AppendData(value.wx_str(), len);
}
void ProjectSerializer::WriteAttr(const wxString & name, int value)
{
mBuffer.AppendByte(FT_Int);
WriteName(name);
WriteInt( mBuffer, value );
}
void ProjectSerializer::WriteAttr(const wxString & name, bool value)
{
mBuffer.AppendByte(FT_Bool);
WriteName(name);
mBuffer.AppendByte(value);
}
void ProjectSerializer::WriteAttr(const wxString & name, long value)
{
mBuffer.AppendByte(FT_Long);
WriteName(name);
WriteLong( mBuffer, value );
}
void ProjectSerializer::WriteAttr(const wxString & name, long long value)
{
mBuffer.AppendByte(FT_LongLong);
WriteName(name);
WriteLongLong( mBuffer, value );
}
void ProjectSerializer::WriteAttr(const wxString & name, size_t value)
{
mBuffer.AppendByte(FT_SizeT);
WriteName(name);
WriteULong( mBuffer, value );
}
void ProjectSerializer::WriteAttr(const wxString & name, float value, int digits)
{
mBuffer.AppendByte(FT_Float);
WriteName(name);
mBuffer.AppendData(&value, sizeof(value));
WriteDigits( mBuffer, digits );
}
void ProjectSerializer::WriteAttr(const wxString & name, double value, int digits)
{
mBuffer.AppendByte(FT_Double);
WriteName(name);
mBuffer.AppendData(&value, sizeof(value));
WriteDigits( mBuffer, digits );
}
void ProjectSerializer::WriteData(const wxString & value)
{
mBuffer.AppendByte(FT_Data);
Length len = value.length() * sizeof(wxChar);
WriteLength( mBuffer, len );
mBuffer.AppendData(value.wx_str(), len);
}
void ProjectSerializer::Write(const wxString & value)
{
mBuffer.AppendByte(FT_Raw);
Length len = value.length() * sizeof(wxChar);
WriteLength( mBuffer, len );
mBuffer.AppendData(value.wx_str(), len);
}
void ProjectSerializer::WriteSubTree(const ProjectSerializer & value)
{
mBuffer.AppendByte(FT_Push);
mBuffer.AppendData(value.mDict.GetData(), value.mDict.GetDataLen());
mBuffer.AppendData(value.mBuffer.GetData(), value.mBuffer.GetDataLen());
mBuffer.AppendByte(FT_Pop);
}
void ProjectSerializer::WriteName(const wxString & name)
{
wxASSERT(name.length() * sizeof(wxChar) <= SHRT_MAX);
UShort id;
auto nameiter = mNames.find(name);
if (nameiter != mNames.end())
{
id = nameiter->second;
}
else
{
// mNames is static. This appends each name to static mDict only once
// in each run.
UShort len = name.length() * sizeof(wxChar);
id = mNames.size();
mNames[name] = id;
mDict.AppendByte(FT_Name);
WriteUShort( mDict, id );
WriteUShort( mDict, len );
mDict.AppendData(name.wx_str(), len);
mDictChanged = true;
}
WriteUShort( mBuffer, id );
}
const wxMemoryBuffer &ProjectSerializer::GetDict() const
{
return mDict;
}
const wxMemoryBuffer &ProjectSerializer::GetData() const
{
return mBuffer;
}
bool ProjectSerializer::IsEmpty() const
{
return mBuffer.GetDataLen() == 0;
}
bool ProjectSerializer::DictChanged() const
{
return mDictChanged;
}
// See ProjectFileIO::LoadProject() for explanation of the blockids arg
wxString ProjectSerializer::Decode(const wxMemoryBuffer &buffer)
{
wxMemoryInputStream in(buffer.GetData(), buffer.GetDataLen());
XMLStringWriter out;
std::vector<char> bytes;
IdMap mIds;
std::vector<IdMap> mIdStack;
char mCharSize = 0;
mIds.clear();
struct Error{}; // exception type for short-range try/catch
auto Lookup = [&mIds]( UShort id ) -> const wxString &
{
auto iter = mIds.find( id );
if (iter == mIds.end())
{
throw Error{};
}
return iter->second;
};
auto ReadString = [&mCharSize, &in, &bytes](int len) -> wxString
{
bytes.reserve( len + 4 );
auto data = bytes.data();
in.Read( data, len );
// Make a null terminator of the widest type
memset( data + len, '\0', 4 );
wxUString str;
switch (mCharSize)
{
case 1:
str.assignFromUTF8(data, len);
break;
case 2:
str.assignFromUTF16((wxChar16 *) data, len / 2);
break;
case 4:
str = wxU32CharBuffer::CreateNonOwned((wxChar32 *) data, len / 4);
break;
default:
wxASSERT_MSG(false, wxT("Characters size not 1, 2, or 4"));
break;
}
return str;
};
try
{
while (!in.Eof())
{
UShort id;
switch (in.GetC())
{
case FT_Push:
{
mIdStack.push_back(mIds);
mIds.clear();
}
break;
case FT_Pop:
{
mIds = mIdStack.back();
mIdStack.pop_back();
}
break;
case FT_Name:
{
id = ReadUShort( in );
auto len = ReadUShort( in );
mIds[id] = ReadString(len);
}
break;
case FT_StartTag:
{
id = ReadUShort( in );
out.StartTag(Lookup(id));
}
break;
case FT_EndTag:
{
id = ReadUShort( in );
out.EndTag(Lookup(id));
}
break;
case FT_String:
{
id = ReadUShort( in );
int len = ReadLength( in );
out.WriteAttr(Lookup(id), ReadString(len));
}
break;
case FT_Float:
{
float val;
id = ReadUShort( in );
in.Read(&val, sizeof(val));
int dig = ReadDigits( in );
out.WriteAttr(Lookup(id), val, dig);
}
break;
case FT_Double:
{
double val;
id = ReadUShort( in );
in.Read(&val, sizeof(val));
int dig = ReadDigits( in );
out.WriteAttr(Lookup(id), val, dig);
}
break;
case FT_Int:
{
id = ReadUShort( in );
int val = ReadInt( in );
out.WriteAttr(Lookup(id), val);
}
break;
case FT_Bool:
{
unsigned char val;
id = ReadUShort( in );
in.Read(&val, 1);
out.WriteAttr(Lookup(id), val);
}
break;
case FT_Long:
{
id = ReadUShort( in );
long val = ReadLong( in );
out.WriteAttr(Lookup(id), val);
}
break;
case FT_LongLong:
{
id = ReadUShort( in );
long long val = ReadLongLong( in );
out.WriteAttr(Lookup(id), val);
}
break;
case FT_SizeT:
{
id = ReadUShort( in );
size_t val = ReadULong( in );
out.WriteAttr(Lookup(id), val);
}
break;
case FT_Data:
{
int len = ReadLength( in );
out.WriteData(ReadString(len));
}
break;
case FT_Raw:
{
int len = ReadLength( in );
out.Write(ReadString(len));
}
break;
case FT_CharSize:
{
in.Read(&mCharSize, 1);
}
break;
default:
wxASSERT(true);
break;
}
}
}
catch( const Error& )
{
// Document was corrupt, or platform differences in size or endianness
// were not well canonicalized
return {};
}
return out;
}