mirror of
https://github.com/ZLMediaKit/ZLMediaKit.git
synced 2024-11-25 20:27:34 +08:00
1980 lines
54 KiB
C++
1980 lines
54 KiB
C++
// Copyright 2007-2011 Baptiste Lepilleur
|
|
// Distributed under MIT license, or public domain if desired and
|
|
// recognized in your jurisdiction.
|
|
// See file LICENSE for detail or copy at http://jsoncpp.sourceforge.net/LICENSE
|
|
|
|
#if !defined(JSON_IS_AMALGAMATION)
|
|
#include "assertions.h"
|
|
#include "reader.h"
|
|
#include "value.h"
|
|
#include "json_tool.h"
|
|
#endif // if !defined(JSON_IS_AMALGAMATION)
|
|
#include <utility>
|
|
#include <cstdio>
|
|
#include <cassert>
|
|
#include <cstring>
|
|
#include <istream>
|
|
#include <sstream>
|
|
#include <memory>
|
|
#include <set>
|
|
|
|
#if defined(_MSC_VER) && _MSC_VER < 1500 // VC++ 8.0 and below
|
|
#define snprintf _snprintf
|
|
#endif
|
|
|
|
#if defined(_MSC_VER) && _MSC_VER >= 1400 // VC++ 8.0
|
|
// Disable warning about strdup being deprecated.
|
|
#pragma warning(disable : 4996)
|
|
#endif
|
|
|
|
static int const stackLimit_g = 1000;
|
|
static int stackDepth_g = 0; // see readValue()
|
|
|
|
namespace Json {
|
|
|
|
#if __cplusplus >= 201103L
|
|
typedef std::unique_ptr<CharReader> CharReaderPtr;
|
|
#else
|
|
typedef std::auto_ptr<CharReader> CharReaderPtr;
|
|
#endif
|
|
|
|
// Implementation of class Features
|
|
// ////////////////////////////////
|
|
|
|
Features::Features()
|
|
: allowComments_(true), strictRoot_(false),
|
|
allowDroppedNullPlaceholders_(false), allowNumericKeys_(false) {}
|
|
|
|
Features Features::all() { return Features(); }
|
|
|
|
Features Features::strictMode() {
|
|
Features features;
|
|
features.allowComments_ = false;
|
|
features.strictRoot_ = true;
|
|
features.allowDroppedNullPlaceholders_ = false;
|
|
features.allowNumericKeys_ = false;
|
|
return features;
|
|
}
|
|
|
|
// Implementation of class Reader
|
|
// ////////////////////////////////
|
|
|
|
static bool containsNewLine(Reader::Location begin, Reader::Location end) {
|
|
for (; begin < end; ++begin)
|
|
if (*begin == '\n' || *begin == '\r')
|
|
return true;
|
|
return false;
|
|
}
|
|
|
|
// Class Reader
|
|
// //////////////////////////////////////////////////////////////////
|
|
|
|
Reader::Reader()
|
|
: errors_(), document_(), begin_(), end_(), current_(), lastValueEnd_(),
|
|
lastValue_(), commentsBefore_(), features_(Features::all()),
|
|
collectComments_() {}
|
|
|
|
Reader::Reader(const Features& features)
|
|
: errors_(), document_(), begin_(), end_(), current_(), lastValueEnd_(),
|
|
lastValue_(), commentsBefore_(), features_(features), collectComments_() {
|
|
}
|
|
|
|
bool
|
|
Reader::parse(const std::string& document, Value& root, bool collectComments) {
|
|
document_ = document;
|
|
const char* begin = document_.c_str();
|
|
const char* end = begin + document_.length();
|
|
return parse(begin, end, root, collectComments);
|
|
}
|
|
|
|
bool Reader::parse(std::istream& sin, Value& root, bool collectComments) {
|
|
// std::istream_iterator<char> begin(sin);
|
|
// std::istream_iterator<char> end;
|
|
// Those would allow streamed input from a file, if parse() were a
|
|
// template function.
|
|
|
|
// Since std::string is reference-counted, this at least does not
|
|
// create an extra copy.
|
|
std::string doc;
|
|
std::getline(sin, doc, (char)EOF);
|
|
return parse(doc, root, collectComments);
|
|
}
|
|
|
|
bool Reader::parse(const char* beginDoc,
|
|
const char* endDoc,
|
|
Value& root,
|
|
bool collectComments) {
|
|
if (!features_.allowComments_) {
|
|
collectComments = false;
|
|
}
|
|
|
|
begin_ = beginDoc;
|
|
end_ = endDoc;
|
|
collectComments_ = collectComments;
|
|
current_ = begin_;
|
|
lastValueEnd_ = 0;
|
|
lastValue_ = 0;
|
|
commentsBefore_ = "";
|
|
errors_.clear();
|
|
while (!nodes_.empty())
|
|
nodes_.pop();
|
|
nodes_.push(&root);
|
|
|
|
stackDepth_g = 0; // Yes, this is bad coding, but options are limited.
|
|
bool successful = readValue();
|
|
Token token;
|
|
skipCommentTokens(token);
|
|
if (collectComments_ && !commentsBefore_.empty())
|
|
root.setComment(commentsBefore_, commentAfter);
|
|
if (features_.strictRoot_) {
|
|
if (!root.isArray() && !root.isObject()) {
|
|
// Set error location to start of doc, ideally should be first token found
|
|
// in doc
|
|
token.type_ = tokenError;
|
|
token.start_ = beginDoc;
|
|
token.end_ = endDoc;
|
|
addError(
|
|
"A valid JSON document must be either an array or an object value.",
|
|
token);
|
|
return false;
|
|
}
|
|
}
|
|
return successful;
|
|
}
|
|
|
|
bool Reader::readValue() {
|
|
// This is a non-reentrant way to support a stackLimit. Terrible!
|
|
// But this deprecated class has a security problem: Bad input can
|
|
// cause a seg-fault. This seems like a fair, binary-compatible way
|
|
// to prevent the problem.
|
|
if (stackDepth_g >= stackLimit_g) throwRuntimeError("Exceeded stackLimit in readValue().");
|
|
++stackDepth_g;
|
|
|
|
Token token;
|
|
skipCommentTokens(token);
|
|
bool successful = true;
|
|
|
|
if (collectComments_ && !commentsBefore_.empty()) {
|
|
currentValue().setComment(commentsBefore_, commentBefore);
|
|
commentsBefore_ = "";
|
|
}
|
|
|
|
switch (token.type_) {
|
|
case tokenObjectBegin:
|
|
successful = readObject(token);
|
|
currentValue().setOffsetLimit(current_ - begin_);
|
|
break;
|
|
case tokenArrayBegin:
|
|
successful = readArray(token);
|
|
currentValue().setOffsetLimit(current_ - begin_);
|
|
break;
|
|
case tokenNumber:
|
|
successful = decodeNumber(token);
|
|
break;
|
|
case tokenString:
|
|
successful = decodeString(token);
|
|
break;
|
|
case tokenTrue:
|
|
{
|
|
Value v(true);
|
|
currentValue().swapPayload(v);
|
|
currentValue().setOffsetStart(token.start_ - begin_);
|
|
currentValue().setOffsetLimit(token.end_ - begin_);
|
|
}
|
|
break;
|
|
case tokenFalse:
|
|
{
|
|
Value v(false);
|
|
currentValue().swapPayload(v);
|
|
currentValue().setOffsetStart(token.start_ - begin_);
|
|
currentValue().setOffsetLimit(token.end_ - begin_);
|
|
}
|
|
break;
|
|
case tokenNull:
|
|
{
|
|
Value v;
|
|
currentValue().swapPayload(v);
|
|
currentValue().setOffsetStart(token.start_ - begin_);
|
|
currentValue().setOffsetLimit(token.end_ - begin_);
|
|
}
|
|
break;
|
|
case tokenArraySeparator:
|
|
case tokenObjectEnd:
|
|
case tokenArrayEnd:
|
|
if (features_.allowDroppedNullPlaceholders_) {
|
|
// "Un-read" the current token and mark the current value as a null
|
|
// token.
|
|
current_--;
|
|
Value v;
|
|
currentValue().swapPayload(v);
|
|
currentValue().setOffsetStart(current_ - begin_ - 1);
|
|
currentValue().setOffsetLimit(current_ - begin_);
|
|
break;
|
|
} // Else, fall through...
|
|
default:
|
|
currentValue().setOffsetStart(token.start_ - begin_);
|
|
currentValue().setOffsetLimit(token.end_ - begin_);
|
|
return addError("Syntax error: value, object or array expected.", token);
|
|
}
|
|
|
|
if (collectComments_) {
|
|
lastValueEnd_ = current_;
|
|
lastValue_ = ¤tValue();
|
|
}
|
|
|
|
--stackDepth_g;
|
|
return successful;
|
|
}
|
|
|
|
void Reader::skipCommentTokens(Token& token) {
|
|
if (features_.allowComments_) {
|
|
do {
|
|
readToken(token);
|
|
} while (token.type_ == tokenComment);
|
|
} else {
|
|
readToken(token);
|
|
}
|
|
}
|
|
|
|
bool Reader::readToken(Token& token) {
|
|
skipSpaces();
|
|
token.start_ = current_;
|
|
Char c = getNextChar();
|
|
bool ok = true;
|
|
switch (c) {
|
|
case '{':
|
|
token.type_ = tokenObjectBegin;
|
|
break;
|
|
case '}':
|
|
token.type_ = tokenObjectEnd;
|
|
break;
|
|
case '[':
|
|
token.type_ = tokenArrayBegin;
|
|
break;
|
|
case ']':
|
|
token.type_ = tokenArrayEnd;
|
|
break;
|
|
case '"':
|
|
token.type_ = tokenString;
|
|
ok = readString();
|
|
break;
|
|
case '/':
|
|
token.type_ = tokenComment;
|
|
ok = readComment();
|
|
break;
|
|
case '0':
|
|
case '1':
|
|
case '2':
|
|
case '3':
|
|
case '4':
|
|
case '5':
|
|
case '6':
|
|
case '7':
|
|
case '8':
|
|
case '9':
|
|
case '-':
|
|
token.type_ = tokenNumber;
|
|
readNumber();
|
|
break;
|
|
case 't':
|
|
token.type_ = tokenTrue;
|
|
ok = match("rue", 3);
|
|
break;
|
|
case 'f':
|
|
token.type_ = tokenFalse;
|
|
ok = match("alse", 4);
|
|
break;
|
|
case 'n':
|
|
token.type_ = tokenNull;
|
|
ok = match("ull", 3);
|
|
break;
|
|
case ',':
|
|
token.type_ = tokenArraySeparator;
|
|
break;
|
|
case ':':
|
|
token.type_ = tokenMemberSeparator;
|
|
break;
|
|
case 0:
|
|
token.type_ = tokenEndOfStream;
|
|
break;
|
|
default:
|
|
ok = false;
|
|
break;
|
|
}
|
|
if (!ok)
|
|
token.type_ = tokenError;
|
|
token.end_ = current_;
|
|
return true;
|
|
}
|
|
|
|
void Reader::skipSpaces() {
|
|
while (current_ != end_) {
|
|
Char c = *current_;
|
|
if (c == ' ' || c == '\t' || c == '\r' || c == '\n')
|
|
++current_;
|
|
else
|
|
break;
|
|
}
|
|
}
|
|
|
|
bool Reader::match(Location pattern, int patternLength) {
|
|
if (end_ - current_ < patternLength)
|
|
return false;
|
|
int index = patternLength;
|
|
while (index--)
|
|
if (current_[index] != pattern[index])
|
|
return false;
|
|
current_ += patternLength;
|
|
return true;
|
|
}
|
|
|
|
bool Reader::readComment() {
|
|
Location commentBegin = current_ - 1;
|
|
Char c = getNextChar();
|
|
bool successful = false;
|
|
if (c == '*')
|
|
successful = readCStyleComment();
|
|
else if (c == '/')
|
|
successful = readCppStyleComment();
|
|
if (!successful)
|
|
return false;
|
|
|
|
if (collectComments_) {
|
|
CommentPlacement placement = commentBefore;
|
|
if (lastValueEnd_ && !containsNewLine(lastValueEnd_, commentBegin)) {
|
|
if (c != '*' || !containsNewLine(commentBegin, current_))
|
|
placement = commentAfterOnSameLine;
|
|
}
|
|
|
|
addComment(commentBegin, current_, placement);
|
|
}
|
|
return true;
|
|
}
|
|
|
|
static std::string normalizeEOL(Reader::Location begin, Reader::Location end) {
|
|
std::string normalized;
|
|
normalized.reserve(end - begin);
|
|
Reader::Location current = begin;
|
|
while (current != end) {
|
|
char c = *current++;
|
|
if (c == '\r') {
|
|
if (current != end && *current == '\n')
|
|
// convert dos EOL
|
|
++current;
|
|
// convert Mac EOL
|
|
normalized += '\n';
|
|
} else {
|
|
normalized += c;
|
|
}
|
|
}
|
|
return normalized;
|
|
}
|
|
|
|
void
|
|
Reader::addComment(Location begin, Location end, CommentPlacement placement) {
|
|
assert(collectComments_);
|
|
const std::string& normalized = normalizeEOL(begin, end);
|
|
if (placement == commentAfterOnSameLine) {
|
|
assert(lastValue_ != 0);
|
|
lastValue_->setComment(normalized, placement);
|
|
} else {
|
|
commentsBefore_ += normalized;
|
|
}
|
|
}
|
|
|
|
bool Reader::readCStyleComment() {
|
|
while (current_ != end_) {
|
|
Char c = getNextChar();
|
|
if (c == '*' && *current_ == '/')
|
|
break;
|
|
}
|
|
return getNextChar() == '/';
|
|
}
|
|
|
|
bool Reader::readCppStyleComment() {
|
|
while (current_ != end_) {
|
|
Char c = getNextChar();
|
|
if (c == '\n')
|
|
break;
|
|
if (c == '\r') {
|
|
// Consume DOS EOL. It will be normalized in addComment.
|
|
if (current_ != end_ && *current_ == '\n')
|
|
getNextChar();
|
|
// Break on Moc OS 9 EOL.
|
|
break;
|
|
}
|
|
}
|
|
return true;
|
|
}
|
|
|
|
void Reader::readNumber() {
|
|
const char *p = current_;
|
|
char c = '0'; // stopgap for already consumed character
|
|
// integral part
|
|
while (c >= '0' && c <= '9')
|
|
c = (current_ = p) < end_ ? *p++ : 0;
|
|
// fractional part
|
|
if (c == '.') {
|
|
c = (current_ = p) < end_ ? *p++ : 0;
|
|
while (c >= '0' && c <= '9')
|
|
c = (current_ = p) < end_ ? *p++ : 0;
|
|
}
|
|
// exponential part
|
|
if (c == 'e' || c == 'E') {
|
|
c = (current_ = p) < end_ ? *p++ : 0;
|
|
if (c == '+' || c == '-')
|
|
c = (current_ = p) < end_ ? *p++ : 0;
|
|
while (c >= '0' && c <= '9')
|
|
c = (current_ = p) < end_ ? *p++ : 0;
|
|
}
|
|
}
|
|
|
|
bool Reader::readString() {
|
|
Char c = 0;
|
|
while (current_ != end_) {
|
|
c = getNextChar();
|
|
if (c == '\\')
|
|
getNextChar();
|
|
else if (c == '"')
|
|
break;
|
|
}
|
|
return c == '"';
|
|
}
|
|
|
|
bool Reader::readObject(Token& tokenStart) {
|
|
Token tokenName;
|
|
std::string name;
|
|
Value init(objectValue);
|
|
currentValue().swapPayload(init);
|
|
currentValue().setOffsetStart(tokenStart.start_ - begin_);
|
|
while (readToken(tokenName)) {
|
|
bool initialTokenOk = true;
|
|
while (tokenName.type_ == tokenComment && initialTokenOk)
|
|
initialTokenOk = readToken(tokenName);
|
|
if (!initialTokenOk)
|
|
break;
|
|
if (tokenName.type_ == tokenObjectEnd && name.empty()) // empty object
|
|
return true;
|
|
name = "";
|
|
if (tokenName.type_ == tokenString) {
|
|
if (!decodeString(tokenName, name))
|
|
return recoverFromError(tokenObjectEnd);
|
|
} else if (tokenName.type_ == tokenNumber && features_.allowNumericKeys_) {
|
|
Value numberName;
|
|
if (!decodeNumber(tokenName, numberName))
|
|
return recoverFromError(tokenObjectEnd);
|
|
name = numberName.asString();
|
|
} else {
|
|
break;
|
|
}
|
|
|
|
Token colon;
|
|
if (!readToken(colon) || colon.type_ != tokenMemberSeparator) {
|
|
return addErrorAndRecover(
|
|
"Missing ':' after object member name", colon, tokenObjectEnd);
|
|
}
|
|
Value& value = currentValue()[name];
|
|
nodes_.push(&value);
|
|
bool ok = readValue();
|
|
nodes_.pop();
|
|
if (!ok) // error already set
|
|
return recoverFromError(tokenObjectEnd);
|
|
|
|
Token comma;
|
|
if (!readToken(comma) ||
|
|
(comma.type_ != tokenObjectEnd && comma.type_ != tokenArraySeparator &&
|
|
comma.type_ != tokenComment)) {
|
|
return addErrorAndRecover(
|
|
"Missing ',' or '}' in object declaration", comma, tokenObjectEnd);
|
|
}
|
|
bool finalizeTokenOk = true;
|
|
while (comma.type_ == tokenComment && finalizeTokenOk)
|
|
finalizeTokenOk = readToken(comma);
|
|
if (comma.type_ == tokenObjectEnd)
|
|
return true;
|
|
}
|
|
return addErrorAndRecover(
|
|
"Missing '}' or object member name", tokenName, tokenObjectEnd);
|
|
}
|
|
|
|
bool Reader::readArray(Token& tokenStart) {
|
|
Value init(arrayValue);
|
|
currentValue().swapPayload(init);
|
|
currentValue().setOffsetStart(tokenStart.start_ - begin_);
|
|
skipSpaces();
|
|
if (*current_ == ']') // empty array
|
|
{
|
|
Token endArray;
|
|
readToken(endArray);
|
|
return true;
|
|
}
|
|
int index = 0;
|
|
for (;;) {
|
|
Value& value = currentValue()[index++];
|
|
nodes_.push(&value);
|
|
bool ok = readValue();
|
|
nodes_.pop();
|
|
if (!ok) // error already set
|
|
return recoverFromError(tokenArrayEnd);
|
|
|
|
Token token;
|
|
// Accept Comment after last item in the array.
|
|
ok = readToken(token);
|
|
while (token.type_ == tokenComment && ok) {
|
|
ok = readToken(token);
|
|
}
|
|
bool badTokenType =
|
|
(token.type_ != tokenArraySeparator && token.type_ != tokenArrayEnd);
|
|
if (!ok || badTokenType) {
|
|
return addErrorAndRecover(
|
|
"Missing ',' or ']' in array declaration", token, tokenArrayEnd);
|
|
}
|
|
if (token.type_ == tokenArrayEnd)
|
|
break;
|
|
}
|
|
return true;
|
|
}
|
|
|
|
bool Reader::decodeNumber(Token& token) {
|
|
Value decoded;
|
|
if (!decodeNumber(token, decoded))
|
|
return false;
|
|
currentValue().swapPayload(decoded);
|
|
currentValue().setOffsetStart(token.start_ - begin_);
|
|
currentValue().setOffsetLimit(token.end_ - begin_);
|
|
return true;
|
|
}
|
|
|
|
bool Reader::decodeNumber(Token& token, Value& decoded) {
|
|
// Attempts to parse the number as an integer. If the number is
|
|
// larger than the maximum supported value of an integer then
|
|
// we decode the number as a double.
|
|
Location current = token.start_;
|
|
bool isNegative = *current == '-';
|
|
if (isNegative)
|
|
++current;
|
|
// TODO: Help the compiler do the div and mod at compile time or get rid of them.
|
|
Value::LargestUInt maxIntegerValue =
|
|
isNegative ? Value::LargestUInt(Value::maxLargestInt) + 1
|
|
: Value::maxLargestUInt;
|
|
Value::LargestUInt threshold = maxIntegerValue / 10;
|
|
Value::LargestUInt value = 0;
|
|
while (current < token.end_) {
|
|
Char c = *current++;
|
|
if (c < '0' || c > '9')
|
|
return decodeDouble(token, decoded);
|
|
Value::UInt digit(c - '0');
|
|
if (value >= threshold) {
|
|
// We've hit or exceeded the max value divided by 10 (rounded down). If
|
|
// a) we've only just touched the limit, b) this is the last digit, and
|
|
// c) it's small enough to fit in that rounding delta, we're okay.
|
|
// Otherwise treat this number as a double to avoid overflow.
|
|
if (value > threshold || current != token.end_ ||
|
|
digit > maxIntegerValue % 10) {
|
|
return decodeDouble(token, decoded);
|
|
}
|
|
}
|
|
value = value * 10 + digit;
|
|
}
|
|
if (isNegative && value == maxIntegerValue)
|
|
decoded = Value::minLargestInt;
|
|
else if (isNegative)
|
|
decoded = -Value::LargestInt(value);
|
|
else if (value <= Value::LargestUInt(Value::maxInt))
|
|
decoded = Value::LargestInt(value);
|
|
else
|
|
decoded = value;
|
|
return true;
|
|
}
|
|
|
|
bool Reader::decodeDouble(Token& token) {
|
|
Value decoded;
|
|
if (!decodeDouble(token, decoded))
|
|
return false;
|
|
currentValue().swapPayload(decoded);
|
|
currentValue().setOffsetStart(token.start_ - begin_);
|
|
currentValue().setOffsetLimit(token.end_ - begin_);
|
|
return true;
|
|
}
|
|
|
|
bool Reader::decodeDouble(Token& token, Value& decoded) {
|
|
double value = 0;
|
|
std::string buffer(token.start_, token.end_);
|
|
std::istringstream is(buffer);
|
|
if (!(is >> value))
|
|
return addError("'" + std::string(token.start_, token.end_) +
|
|
"' is not a number.",
|
|
token);
|
|
decoded = value;
|
|
return true;
|
|
}
|
|
|
|
bool Reader::decodeString(Token& token) {
|
|
std::string decoded_string;
|
|
if (!decodeString(token, decoded_string))
|
|
return false;
|
|
Value decoded(decoded_string);
|
|
currentValue().swapPayload(decoded);
|
|
currentValue().setOffsetStart(token.start_ - begin_);
|
|
currentValue().setOffsetLimit(token.end_ - begin_);
|
|
return true;
|
|
}
|
|
|
|
bool Reader::decodeString(Token& token, std::string& decoded) {
|
|
decoded.reserve(token.end_ - token.start_ - 2);
|
|
Location current = token.start_ + 1; // skip '"'
|
|
Location end = token.end_ - 1; // do not include '"'
|
|
while (current != end) {
|
|
Char c = *current++;
|
|
if (c == '"')
|
|
break;
|
|
else if (c == '\\') {
|
|
if (current == end)
|
|
return addError("Empty escape sequence in string", token, current);
|
|
Char escape = *current++;
|
|
switch (escape) {
|
|
case '"':
|
|
decoded += '"';
|
|
break;
|
|
case '/':
|
|
decoded += '/';
|
|
break;
|
|
case '\\':
|
|
decoded += '\\';
|
|
break;
|
|
case 'b':
|
|
decoded += '\b';
|
|
break;
|
|
case 'f':
|
|
decoded += '\f';
|
|
break;
|
|
case 'n':
|
|
decoded += '\n';
|
|
break;
|
|
case 'r':
|
|
decoded += '\r';
|
|
break;
|
|
case 't':
|
|
decoded += '\t';
|
|
break;
|
|
case 'u': {
|
|
unsigned int unicode;
|
|
if (!decodeUnicodeCodePoint(token, current, end, unicode))
|
|
return false;
|
|
decoded += codePointToUTF8(unicode);
|
|
} break;
|
|
default:
|
|
return addError("Bad escape sequence in string", token, current);
|
|
}
|
|
} else {
|
|
decoded += c;
|
|
}
|
|
}
|
|
return true;
|
|
}
|
|
|
|
bool Reader::decodeUnicodeCodePoint(Token& token,
|
|
Location& current,
|
|
Location end,
|
|
unsigned int& unicode) {
|
|
|
|
if (!decodeUnicodeEscapeSequence(token, current, end, unicode))
|
|
return false;
|
|
if (unicode >= 0xD800 && unicode <= 0xDBFF) {
|
|
// surrogate pairs
|
|
if (end - current < 6)
|
|
return addError(
|
|
"additional six characters expected to parse unicode surrogate pair.",
|
|
token,
|
|
current);
|
|
unsigned int surrogatePair;
|
|
if (*(current++) == '\\' && *(current++) == 'u') {
|
|
if (decodeUnicodeEscapeSequence(token, current, end, surrogatePair)) {
|
|
unicode = 0x10000 + ((unicode & 0x3FF) << 10) + (surrogatePair & 0x3FF);
|
|
} else
|
|
return false;
|
|
} else
|
|
return addError("expecting another \\u token to begin the second half of "
|
|
"a unicode surrogate pair",
|
|
token,
|
|
current);
|
|
}
|
|
return true;
|
|
}
|
|
|
|
bool Reader::decodeUnicodeEscapeSequence(Token& token,
|
|
Location& current,
|
|
Location end,
|
|
unsigned int& unicode) {
|
|
if (end - current < 4)
|
|
return addError(
|
|
"Bad unicode escape sequence in string: four digits expected.",
|
|
token,
|
|
current);
|
|
unicode = 0;
|
|
for (int index = 0; index < 4; ++index) {
|
|
Char c = *current++;
|
|
unicode *= 16;
|
|
if (c >= '0' && c <= '9')
|
|
unicode += c - '0';
|
|
else if (c >= 'a' && c <= 'f')
|
|
unicode += c - 'a' + 10;
|
|
else if (c >= 'A' && c <= 'F')
|
|
unicode += c - 'A' + 10;
|
|
else
|
|
return addError(
|
|
"Bad unicode escape sequence in string: hexadecimal digit expected.",
|
|
token,
|
|
current);
|
|
}
|
|
return true;
|
|
}
|
|
|
|
bool
|
|
Reader::addError(const std::string& message, Token& token, Location extra) {
|
|
ErrorInfo info;
|
|
info.token_ = token;
|
|
info.message_ = message;
|
|
info.extra_ = extra;
|
|
errors_.push_back(info);
|
|
return false;
|
|
}
|
|
|
|
bool Reader::recoverFromError(TokenType skipUntilToken) {
|
|
int errorCount = int(errors_.size());
|
|
Token skip;
|
|
for (;;) {
|
|
if (!readToken(skip))
|
|
errors_.resize(errorCount); // discard errors caused by recovery
|
|
if (skip.type_ == skipUntilToken || skip.type_ == tokenEndOfStream)
|
|
break;
|
|
}
|
|
errors_.resize(errorCount);
|
|
return false;
|
|
}
|
|
|
|
bool Reader::addErrorAndRecover(const std::string& message,
|
|
Token& token,
|
|
TokenType skipUntilToken) {
|
|
addError(message, token);
|
|
return recoverFromError(skipUntilToken);
|
|
}
|
|
|
|
Value& Reader::currentValue() { return *(nodes_.top()); }
|
|
|
|
Reader::Char Reader::getNextChar() {
|
|
if (current_ == end_)
|
|
return 0;
|
|
return *current_++;
|
|
}
|
|
|
|
void Reader::getLocationLineAndColumn(Location location,
|
|
int& line,
|
|
int& column) const {
|
|
Location current = begin_;
|
|
Location lastLineStart = current;
|
|
line = 0;
|
|
while (current < location && current != end_) {
|
|
Char c = *current++;
|
|
if (c == '\r') {
|
|
if (*current == '\n')
|
|
++current;
|
|
lastLineStart = current;
|
|
++line;
|
|
} else if (c == '\n') {
|
|
lastLineStart = current;
|
|
++line;
|
|
}
|
|
}
|
|
// column & line start at 1
|
|
column = int(location - lastLineStart) + 1;
|
|
++line;
|
|
}
|
|
|
|
std::string Reader::getLocationLineAndColumn(Location location) const {
|
|
int line, column;
|
|
getLocationLineAndColumn(location, line, column);
|
|
char buffer[18 + 16 + 16 + 1];
|
|
#if defined(_MSC_VER) && defined(__STDC_SECURE_LIB__)
|
|
#if defined(WINCE)
|
|
_snprintf(buffer, sizeof(buffer), "Line %d, Column %d", line, column);
|
|
#else
|
|
sprintf_s(buffer, sizeof(buffer), "Line %d, Column %d", line, column);
|
|
#endif
|
|
#else
|
|
snprintf(buffer, sizeof(buffer), "Line %d, Column %d", line, column);
|
|
#endif
|
|
return buffer;
|
|
}
|
|
|
|
// Deprecated. Preserved for backward compatibility
|
|
std::string Reader::getFormatedErrorMessages() const {
|
|
return getFormattedErrorMessages();
|
|
}
|
|
|
|
std::string Reader::getFormattedErrorMessages() const {
|
|
std::string formattedMessage;
|
|
for (Errors::const_iterator itError = errors_.begin();
|
|
itError != errors_.end();
|
|
++itError) {
|
|
const ErrorInfo& error = *itError;
|
|
formattedMessage +=
|
|
"* " + getLocationLineAndColumn(error.token_.start_) + "\n";
|
|
formattedMessage += " " + error.message_ + "\n";
|
|
if (error.extra_)
|
|
formattedMessage +=
|
|
"See " + getLocationLineAndColumn(error.extra_) + " for detail.\n";
|
|
}
|
|
return formattedMessage;
|
|
}
|
|
|
|
std::vector<Reader::StructuredError> Reader::getStructuredErrors() const {
|
|
std::vector<Reader::StructuredError> allErrors;
|
|
for (Errors::const_iterator itError = errors_.begin();
|
|
itError != errors_.end();
|
|
++itError) {
|
|
const ErrorInfo& error = *itError;
|
|
Reader::StructuredError structured;
|
|
structured.offset_start = error.token_.start_ - begin_;
|
|
structured.offset_limit = error.token_.end_ - begin_;
|
|
structured.message = error.message_;
|
|
allErrors.push_back(structured);
|
|
}
|
|
return allErrors;
|
|
}
|
|
|
|
bool Reader::pushError(const Value& value, const std::string& message) {
|
|
size_t length = end_ - begin_;
|
|
if(value.getOffsetStart() > length
|
|
|| value.getOffsetLimit() > length)
|
|
return false;
|
|
Token token;
|
|
token.type_ = tokenError;
|
|
token.start_ = begin_ + value.getOffsetStart();
|
|
token.end_ = end_ + value.getOffsetLimit();
|
|
ErrorInfo info;
|
|
info.token_ = token;
|
|
info.message_ = message;
|
|
info.extra_ = 0;
|
|
errors_.push_back(info);
|
|
return true;
|
|
}
|
|
|
|
bool Reader::pushError(const Value& value, const std::string& message, const Value& extra) {
|
|
size_t length = end_ - begin_;
|
|
if(value.getOffsetStart() > length
|
|
|| value.getOffsetLimit() > length
|
|
|| extra.getOffsetLimit() > length)
|
|
return false;
|
|
Token token;
|
|
token.type_ = tokenError;
|
|
token.start_ = begin_ + value.getOffsetStart();
|
|
token.end_ = begin_ + value.getOffsetLimit();
|
|
ErrorInfo info;
|
|
info.token_ = token;
|
|
info.message_ = message;
|
|
info.extra_ = begin_ + extra.getOffsetStart();
|
|
errors_.push_back(info);
|
|
return true;
|
|
}
|
|
|
|
bool Reader::good() const {
|
|
return !errors_.size();
|
|
}
|
|
|
|
// exact copy of Features
|
|
class OurFeatures {
|
|
public:
|
|
static OurFeatures all();
|
|
OurFeatures();
|
|
bool allowComments_;
|
|
bool strictRoot_;
|
|
bool allowDroppedNullPlaceholders_;
|
|
bool allowNumericKeys_;
|
|
bool allowSingleQuotes_;
|
|
bool failIfExtra_;
|
|
bool rejectDupKeys_;
|
|
int stackLimit_;
|
|
}; // OurFeatures
|
|
|
|
// exact copy of Implementation of class Features
|
|
// ////////////////////////////////
|
|
|
|
OurFeatures::OurFeatures()
|
|
: allowComments_(true), strictRoot_(false)
|
|
, allowDroppedNullPlaceholders_(false), allowNumericKeys_(false)
|
|
, allowSingleQuotes_(false)
|
|
, failIfExtra_(false)
|
|
{
|
|
}
|
|
|
|
OurFeatures OurFeatures::all() { return OurFeatures(); }
|
|
|
|
// Implementation of class Reader
|
|
// ////////////////////////////////
|
|
|
|
// exact copy of Reader, renamed to OurReader
|
|
class OurReader {
|
|
public:
|
|
typedef char Char;
|
|
typedef const Char* Location;
|
|
struct StructuredError {
|
|
size_t offset_start;
|
|
size_t offset_limit;
|
|
std::string message;
|
|
};
|
|
|
|
OurReader(OurFeatures const& features);
|
|
bool parse(const char* beginDoc,
|
|
const char* endDoc,
|
|
Value& root,
|
|
bool collectComments = true);
|
|
std::string getFormattedErrorMessages() const;
|
|
std::vector<StructuredError> getStructuredErrors() const;
|
|
bool pushError(const Value& value, const std::string& message);
|
|
bool pushError(const Value& value, const std::string& message, const Value& extra);
|
|
bool good() const;
|
|
|
|
private:
|
|
OurReader(OurReader const&); // no impl
|
|
void operator=(OurReader const&); // no impl
|
|
|
|
enum TokenType {
|
|
tokenEndOfStream = 0,
|
|
tokenObjectBegin,
|
|
tokenObjectEnd,
|
|
tokenArrayBegin,
|
|
tokenArrayEnd,
|
|
tokenString,
|
|
tokenNumber,
|
|
tokenTrue,
|
|
tokenFalse,
|
|
tokenNull,
|
|
tokenArraySeparator,
|
|
tokenMemberSeparator,
|
|
tokenComment,
|
|
tokenError
|
|
};
|
|
|
|
class Token {
|
|
public:
|
|
TokenType type_;
|
|
Location start_;
|
|
Location end_;
|
|
};
|
|
|
|
class ErrorInfo {
|
|
public:
|
|
Token token_;
|
|
std::string message_;
|
|
Location extra_;
|
|
};
|
|
|
|
typedef std::deque<ErrorInfo> Errors;
|
|
|
|
bool readToken(Token& token);
|
|
void skipSpaces();
|
|
bool match(Location pattern, int patternLength);
|
|
bool readComment();
|
|
bool readCStyleComment();
|
|
bool readCppStyleComment();
|
|
bool readString();
|
|
bool readStringSingleQuote();
|
|
void readNumber();
|
|
bool readValue();
|
|
bool readObject(Token& token);
|
|
bool readArray(Token& token);
|
|
bool decodeNumber(Token& token);
|
|
bool decodeNumber(Token& token, Value& decoded);
|
|
bool decodeString(Token& token);
|
|
bool decodeString(Token& token, std::string& decoded);
|
|
bool decodeDouble(Token& token);
|
|
bool decodeDouble(Token& token, Value& decoded);
|
|
bool decodeUnicodeCodePoint(Token& token,
|
|
Location& current,
|
|
Location end,
|
|
unsigned int& unicode);
|
|
bool decodeUnicodeEscapeSequence(Token& token,
|
|
Location& current,
|
|
Location end,
|
|
unsigned int& unicode);
|
|
bool addError(const std::string& message, Token& token, Location extra = 0);
|
|
bool recoverFromError(TokenType skipUntilToken);
|
|
bool addErrorAndRecover(const std::string& message,
|
|
Token& token,
|
|
TokenType skipUntilToken);
|
|
void skipUntilSpace();
|
|
Value& currentValue();
|
|
Char getNextChar();
|
|
void
|
|
getLocationLineAndColumn(Location location, int& line, int& column) const;
|
|
std::string getLocationLineAndColumn(Location location) const;
|
|
void addComment(Location begin, Location end, CommentPlacement placement);
|
|
void skipCommentTokens(Token& token);
|
|
|
|
typedef std::stack<Value*> Nodes;
|
|
Nodes nodes_;
|
|
Errors errors_;
|
|
std::string document_;
|
|
Location begin_;
|
|
Location end_;
|
|
Location current_;
|
|
Location lastValueEnd_;
|
|
Value* lastValue_;
|
|
std::string commentsBefore_;
|
|
int stackDepth_;
|
|
|
|
OurFeatures const features_;
|
|
bool collectComments_;
|
|
}; // OurReader
|
|
|
|
// complete copy of Read impl, for OurReader
|
|
|
|
OurReader::OurReader(OurFeatures const& features)
|
|
: errors_(), document_(), begin_(), end_(), current_(), lastValueEnd_(),
|
|
lastValue_(), commentsBefore_(), features_(features), collectComments_() {
|
|
}
|
|
|
|
bool OurReader::parse(const char* beginDoc,
|
|
const char* endDoc,
|
|
Value& root,
|
|
bool collectComments) {
|
|
if (!features_.allowComments_) {
|
|
collectComments = false;
|
|
}
|
|
|
|
begin_ = beginDoc;
|
|
end_ = endDoc;
|
|
collectComments_ = collectComments;
|
|
current_ = begin_;
|
|
lastValueEnd_ = 0;
|
|
lastValue_ = 0;
|
|
commentsBefore_ = "";
|
|
errors_.clear();
|
|
while (!nodes_.empty())
|
|
nodes_.pop();
|
|
nodes_.push(&root);
|
|
|
|
stackDepth_ = 0;
|
|
bool successful = readValue();
|
|
Token token;
|
|
skipCommentTokens(token);
|
|
if (features_.failIfExtra_) {
|
|
if (token.type_ != tokenError && token.type_ != tokenEndOfStream) {
|
|
addError("Extra non-whitespace after JSON value.", token);
|
|
return false;
|
|
}
|
|
}
|
|
if (collectComments_ && !commentsBefore_.empty())
|
|
root.setComment(commentsBefore_, commentAfter);
|
|
if (features_.strictRoot_) {
|
|
if (!root.isArray() && !root.isObject()) {
|
|
// Set error location to start of doc, ideally should be first token found
|
|
// in doc
|
|
token.type_ = tokenError;
|
|
token.start_ = beginDoc;
|
|
token.end_ = endDoc;
|
|
addError(
|
|
"A valid JSON document must be either an array or an object value.",
|
|
token);
|
|
return false;
|
|
}
|
|
}
|
|
return successful;
|
|
}
|
|
|
|
bool OurReader::readValue() {
|
|
if (stackDepth_ >= features_.stackLimit_) throwRuntimeError("Exceeded stackLimit in readValue().");
|
|
++stackDepth_;
|
|
Token token;
|
|
skipCommentTokens(token);
|
|
bool successful = true;
|
|
|
|
if (collectComments_ && !commentsBefore_.empty()) {
|
|
currentValue().setComment(commentsBefore_, commentBefore);
|
|
commentsBefore_ = "";
|
|
}
|
|
|
|
switch (token.type_) {
|
|
case tokenObjectBegin:
|
|
successful = readObject(token);
|
|
currentValue().setOffsetLimit(current_ - begin_);
|
|
break;
|
|
case tokenArrayBegin:
|
|
successful = readArray(token);
|
|
currentValue().setOffsetLimit(current_ - begin_);
|
|
break;
|
|
case tokenNumber:
|
|
successful = decodeNumber(token);
|
|
break;
|
|
case tokenString:
|
|
successful = decodeString(token);
|
|
break;
|
|
case tokenTrue:
|
|
{
|
|
Value v(true);
|
|
currentValue().swapPayload(v);
|
|
currentValue().setOffsetStart(token.start_ - begin_);
|
|
currentValue().setOffsetLimit(token.end_ - begin_);
|
|
}
|
|
break;
|
|
case tokenFalse:
|
|
{
|
|
Value v(false);
|
|
currentValue().swapPayload(v);
|
|
currentValue().setOffsetStart(token.start_ - begin_);
|
|
currentValue().setOffsetLimit(token.end_ - begin_);
|
|
}
|
|
break;
|
|
case tokenNull:
|
|
{
|
|
Value v;
|
|
currentValue().swapPayload(v);
|
|
currentValue().setOffsetStart(token.start_ - begin_);
|
|
currentValue().setOffsetLimit(token.end_ - begin_);
|
|
}
|
|
break;
|
|
case tokenArraySeparator:
|
|
case tokenObjectEnd:
|
|
case tokenArrayEnd:
|
|
if (features_.allowDroppedNullPlaceholders_) {
|
|
// "Un-read" the current token and mark the current value as a null
|
|
// token.
|
|
current_--;
|
|
Value v;
|
|
currentValue().swapPayload(v);
|
|
currentValue().setOffsetStart(current_ - begin_ - 1);
|
|
currentValue().setOffsetLimit(current_ - begin_);
|
|
break;
|
|
} // else, fall through ...
|
|
default:
|
|
currentValue().setOffsetStart(token.start_ - begin_);
|
|
currentValue().setOffsetLimit(token.end_ - begin_);
|
|
return addError("Syntax error: value, object or array expected.", token);
|
|
}
|
|
|
|
if (collectComments_) {
|
|
lastValueEnd_ = current_;
|
|
lastValue_ = ¤tValue();
|
|
}
|
|
|
|
--stackDepth_;
|
|
return successful;
|
|
}
|
|
|
|
void OurReader::skipCommentTokens(Token& token) {
|
|
if (features_.allowComments_) {
|
|
do {
|
|
readToken(token);
|
|
} while (token.type_ == tokenComment);
|
|
} else {
|
|
readToken(token);
|
|
}
|
|
}
|
|
|
|
bool OurReader::readToken(Token& token) {
|
|
skipSpaces();
|
|
token.start_ = current_;
|
|
Char c = getNextChar();
|
|
bool ok = true;
|
|
switch (c) {
|
|
case '{':
|
|
token.type_ = tokenObjectBegin;
|
|
break;
|
|
case '}':
|
|
token.type_ = tokenObjectEnd;
|
|
break;
|
|
case '[':
|
|
token.type_ = tokenArrayBegin;
|
|
break;
|
|
case ']':
|
|
token.type_ = tokenArrayEnd;
|
|
break;
|
|
case '"':
|
|
token.type_ = tokenString;
|
|
ok = readString();
|
|
break;
|
|
case '\'':
|
|
if (features_.allowSingleQuotes_) {
|
|
token.type_ = tokenString;
|
|
ok = readStringSingleQuote();
|
|
break;
|
|
} // else continue
|
|
case '/':
|
|
token.type_ = tokenComment;
|
|
ok = readComment();
|
|
break;
|
|
case '0':
|
|
case '1':
|
|
case '2':
|
|
case '3':
|
|
case '4':
|
|
case '5':
|
|
case '6':
|
|
case '7':
|
|
case '8':
|
|
case '9':
|
|
case '-':
|
|
token.type_ = tokenNumber;
|
|
readNumber();
|
|
break;
|
|
case 't':
|
|
token.type_ = tokenTrue;
|
|
ok = match("rue", 3);
|
|
break;
|
|
case 'f':
|
|
token.type_ = tokenFalse;
|
|
ok = match("alse", 4);
|
|
break;
|
|
case 'n':
|
|
token.type_ = tokenNull;
|
|
ok = match("ull", 3);
|
|
break;
|
|
case ',':
|
|
token.type_ = tokenArraySeparator;
|
|
break;
|
|
case ':':
|
|
token.type_ = tokenMemberSeparator;
|
|
break;
|
|
case 0:
|
|
token.type_ = tokenEndOfStream;
|
|
break;
|
|
default:
|
|
ok = false;
|
|
break;
|
|
}
|
|
if (!ok)
|
|
token.type_ = tokenError;
|
|
token.end_ = current_;
|
|
return true;
|
|
}
|
|
|
|
void OurReader::skipSpaces() {
|
|
while (current_ != end_) {
|
|
Char c = *current_;
|
|
if (c == ' ' || c == '\t' || c == '\r' || c == '\n')
|
|
++current_;
|
|
else
|
|
break;
|
|
}
|
|
}
|
|
|
|
bool OurReader::match(Location pattern, int patternLength) {
|
|
if (end_ - current_ < patternLength)
|
|
return false;
|
|
int index = patternLength;
|
|
while (index--)
|
|
if (current_[index] != pattern[index])
|
|
return false;
|
|
current_ += patternLength;
|
|
return true;
|
|
}
|
|
|
|
bool OurReader::readComment() {
|
|
Location commentBegin = current_ - 1;
|
|
Char c = getNextChar();
|
|
bool successful = false;
|
|
if (c == '*')
|
|
successful = readCStyleComment();
|
|
else if (c == '/')
|
|
successful = readCppStyleComment();
|
|
if (!successful)
|
|
return false;
|
|
|
|
if (collectComments_) {
|
|
CommentPlacement placement = commentBefore;
|
|
if (lastValueEnd_ && !containsNewLine(lastValueEnd_, commentBegin)) {
|
|
if (c != '*' || !containsNewLine(commentBegin, current_))
|
|
placement = commentAfterOnSameLine;
|
|
}
|
|
|
|
addComment(commentBegin, current_, placement);
|
|
}
|
|
return true;
|
|
}
|
|
|
|
void
|
|
OurReader::addComment(Location begin, Location end, CommentPlacement placement) {
|
|
assert(collectComments_);
|
|
const std::string& normalized = normalizeEOL(begin, end);
|
|
if (placement == commentAfterOnSameLine) {
|
|
assert(lastValue_ != 0);
|
|
lastValue_->setComment(normalized, placement);
|
|
} else {
|
|
commentsBefore_ += normalized;
|
|
}
|
|
}
|
|
|
|
bool OurReader::readCStyleComment() {
|
|
while (current_ != end_) {
|
|
Char c = getNextChar();
|
|
if (c == '*' && *current_ == '/')
|
|
break;
|
|
}
|
|
return getNextChar() == '/';
|
|
}
|
|
|
|
bool OurReader::readCppStyleComment() {
|
|
while (current_ != end_) {
|
|
Char c = getNextChar();
|
|
if (c == '\n')
|
|
break;
|
|
if (c == '\r') {
|
|
// Consume DOS EOL. It will be normalized in addComment.
|
|
if (current_ != end_ && *current_ == '\n')
|
|
getNextChar();
|
|
// Break on Moc OS 9 EOL.
|
|
break;
|
|
}
|
|
}
|
|
return true;
|
|
}
|
|
|
|
void OurReader::readNumber() {
|
|
const char *p = current_;
|
|
char c = '0'; // stopgap for already consumed character
|
|
// integral part
|
|
while (c >= '0' && c <= '9')
|
|
c = (current_ = p) < end_ ? *p++ : 0;
|
|
// fractional part
|
|
if (c == '.') {
|
|
c = (current_ = p) < end_ ? *p++ : 0;
|
|
while (c >= '0' && c <= '9')
|
|
c = (current_ = p) < end_ ? *p++ : 0;
|
|
}
|
|
// exponential part
|
|
if (c == 'e' || c == 'E') {
|
|
c = (current_ = p) < end_ ? *p++ : 0;
|
|
if (c == '+' || c == '-')
|
|
c = (current_ = p) < end_ ? *p++ : 0;
|
|
while (c >= '0' && c <= '9')
|
|
c = (current_ = p) < end_ ? *p++ : 0;
|
|
}
|
|
}
|
|
bool OurReader::readString() {
|
|
Char c = 0;
|
|
while (current_ != end_) {
|
|
c = getNextChar();
|
|
if (c == '\\')
|
|
getNextChar();
|
|
else if (c == '"')
|
|
break;
|
|
}
|
|
return c == '"';
|
|
}
|
|
|
|
|
|
bool OurReader::readStringSingleQuote() {
|
|
Char c = 0;
|
|
while (current_ != end_) {
|
|
c = getNextChar();
|
|
if (c == '\\')
|
|
getNextChar();
|
|
else if (c == '\'')
|
|
break;
|
|
}
|
|
return c == '\'';
|
|
}
|
|
|
|
bool OurReader::readObject(Token& tokenStart) {
|
|
Token tokenName;
|
|
std::string name;
|
|
Value init(objectValue);
|
|
currentValue().swapPayload(init);
|
|
currentValue().setOffsetStart(tokenStart.start_ - begin_);
|
|
while (readToken(tokenName)) {
|
|
bool initialTokenOk = true;
|
|
while (tokenName.type_ == tokenComment && initialTokenOk)
|
|
initialTokenOk = readToken(tokenName);
|
|
if (!initialTokenOk)
|
|
break;
|
|
if (tokenName.type_ == tokenObjectEnd && name.empty()) // empty object
|
|
return true;
|
|
name = "";
|
|
if (tokenName.type_ == tokenString) {
|
|
if (!decodeString(tokenName, name))
|
|
return recoverFromError(tokenObjectEnd);
|
|
} else if (tokenName.type_ == tokenNumber && features_.allowNumericKeys_) {
|
|
Value numberName;
|
|
if (!decodeNumber(tokenName, numberName))
|
|
return recoverFromError(tokenObjectEnd);
|
|
name = numberName.asString();
|
|
} else {
|
|
break;
|
|
}
|
|
|
|
Token colon;
|
|
if (!readToken(colon) || colon.type_ != tokenMemberSeparator) {
|
|
return addErrorAndRecover(
|
|
"Missing ':' after object member name", colon, tokenObjectEnd);
|
|
}
|
|
if (name.length() >= (1U<<30)) throwRuntimeError("keylength >= 2^30");
|
|
if (features_.rejectDupKeys_ && currentValue().isMember(name)) {
|
|
std::string msg = "Duplicate key: '" + name + "'";
|
|
return addErrorAndRecover(
|
|
msg, tokenName, tokenObjectEnd);
|
|
}
|
|
Value& value = currentValue()[name];
|
|
nodes_.push(&value);
|
|
bool ok = readValue();
|
|
nodes_.pop();
|
|
if (!ok) // error already set
|
|
return recoverFromError(tokenObjectEnd);
|
|
|
|
Token comma;
|
|
if (!readToken(comma) ||
|
|
(comma.type_ != tokenObjectEnd && comma.type_ != tokenArraySeparator &&
|
|
comma.type_ != tokenComment)) {
|
|
return addErrorAndRecover(
|
|
"Missing ',' or '}' in object declaration", comma, tokenObjectEnd);
|
|
}
|
|
bool finalizeTokenOk = true;
|
|
while (comma.type_ == tokenComment && finalizeTokenOk)
|
|
finalizeTokenOk = readToken(comma);
|
|
if (comma.type_ == tokenObjectEnd)
|
|
return true;
|
|
}
|
|
return addErrorAndRecover(
|
|
"Missing '}' or object member name", tokenName, tokenObjectEnd);
|
|
}
|
|
|
|
bool OurReader::readArray(Token& tokenStart) {
|
|
Value init(arrayValue);
|
|
currentValue().swapPayload(init);
|
|
currentValue().setOffsetStart(tokenStart.start_ - begin_);
|
|
skipSpaces();
|
|
if (*current_ == ']') // empty array
|
|
{
|
|
Token endArray;
|
|
readToken(endArray);
|
|
return true;
|
|
}
|
|
int index = 0;
|
|
for (;;) {
|
|
Value& value = currentValue()[index++];
|
|
nodes_.push(&value);
|
|
bool ok = readValue();
|
|
nodes_.pop();
|
|
if (!ok) // error already set
|
|
return recoverFromError(tokenArrayEnd);
|
|
|
|
Token token;
|
|
// Accept Comment after last item in the array.
|
|
ok = readToken(token);
|
|
while (token.type_ == tokenComment && ok) {
|
|
ok = readToken(token);
|
|
}
|
|
bool badTokenType =
|
|
(token.type_ != tokenArraySeparator && token.type_ != tokenArrayEnd);
|
|
if (!ok || badTokenType) {
|
|
return addErrorAndRecover(
|
|
"Missing ',' or ']' in array declaration", token, tokenArrayEnd);
|
|
}
|
|
if (token.type_ == tokenArrayEnd)
|
|
break;
|
|
}
|
|
return true;
|
|
}
|
|
|
|
bool OurReader::decodeNumber(Token& token) {
|
|
Value decoded;
|
|
if (!decodeNumber(token, decoded))
|
|
return false;
|
|
currentValue().swapPayload(decoded);
|
|
currentValue().setOffsetStart(token.start_ - begin_);
|
|
currentValue().setOffsetLimit(token.end_ - begin_);
|
|
return true;
|
|
}
|
|
|
|
bool OurReader::decodeNumber(Token& token, Value& decoded) {
|
|
// Attempts to parse the number as an integer. If the number is
|
|
// larger than the maximum supported value of an integer then
|
|
// we decode the number as a double.
|
|
Location current = token.start_;
|
|
bool isNegative = *current == '-';
|
|
if (isNegative)
|
|
++current;
|
|
// TODO: Help the compiler do the div and mod at compile time or get rid of them.
|
|
Value::LargestUInt maxIntegerValue =
|
|
isNegative ? Value::LargestUInt(-Value::minLargestInt)
|
|
: Value::maxLargestUInt;
|
|
Value::LargestUInt threshold = maxIntegerValue / 10;
|
|
Value::LargestUInt value = 0;
|
|
while (current < token.end_) {
|
|
Char c = *current++;
|
|
if (c < '0' || c > '9')
|
|
return decodeDouble(token, decoded);
|
|
Value::UInt digit(c - '0');
|
|
if (value >= threshold) {
|
|
// We've hit or exceeded the max value divided by 10 (rounded down). If
|
|
// a) we've only just touched the limit, b) this is the last digit, and
|
|
// c) it's small enough to fit in that rounding delta, we're okay.
|
|
// Otherwise treat this number as a double to avoid overflow.
|
|
if (value > threshold || current != token.end_ ||
|
|
digit > maxIntegerValue % 10) {
|
|
return decodeDouble(token, decoded);
|
|
}
|
|
}
|
|
value = value * 10 + digit;
|
|
}
|
|
if (isNegative)
|
|
decoded = -Value::LargestInt(value);
|
|
else if (value <= Value::LargestUInt(Value::maxInt))
|
|
decoded = Value::LargestInt(value);
|
|
else
|
|
decoded = value;
|
|
return true;
|
|
}
|
|
|
|
bool OurReader::decodeDouble(Token& token) {
|
|
Value decoded;
|
|
if (!decodeDouble(token, decoded))
|
|
return false;
|
|
currentValue().swapPayload(decoded);
|
|
currentValue().setOffsetStart(token.start_ - begin_);
|
|
currentValue().setOffsetLimit(token.end_ - begin_);
|
|
return true;
|
|
}
|
|
|
|
bool OurReader::decodeDouble(Token& token, Value& decoded) {
|
|
double value = 0;
|
|
const int bufferSize = 32;
|
|
int count;
|
|
int length = int(token.end_ - token.start_);
|
|
|
|
// Sanity check to avoid buffer overflow exploits.
|
|
if (length < 0) {
|
|
return addError("Unable to parse token length", token);
|
|
}
|
|
|
|
// Avoid using a string constant for the format control string given to
|
|
// sscanf, as this can cause hard to debug crashes on OS X. See here for more
|
|
// info:
|
|
//
|
|
// http://developer.apple.com/library/mac/#DOCUMENTATION/DeveloperTools/gcc-4.0.1/gcc/Incompatibilities.html
|
|
char format[] = "%lf";
|
|
|
|
if (length <= bufferSize) {
|
|
Char buffer[bufferSize + 1];
|
|
memcpy(buffer, token.start_, length);
|
|
buffer[length] = 0;
|
|
count = sscanf(buffer, format, &value);
|
|
} else {
|
|
std::string buffer(token.start_, token.end_);
|
|
count = sscanf(buffer.c_str(), format, &value);
|
|
}
|
|
|
|
if (count != 1)
|
|
return addError("'" + std::string(token.start_, token.end_) +
|
|
"' is not a number.",
|
|
token);
|
|
decoded = value;
|
|
return true;
|
|
}
|
|
|
|
bool OurReader::decodeString(Token& token) {
|
|
std::string decoded_string;
|
|
if (!decodeString(token, decoded_string))
|
|
return false;
|
|
Value decoded(decoded_string);
|
|
currentValue().swapPayload(decoded);
|
|
currentValue().setOffsetStart(token.start_ - begin_);
|
|
currentValue().setOffsetLimit(token.end_ - begin_);
|
|
return true;
|
|
}
|
|
|
|
bool OurReader::decodeString(Token& token, std::string& decoded) {
|
|
decoded.reserve(token.end_ - token.start_ - 2);
|
|
Location current = token.start_ + 1; // skip '"'
|
|
Location end = token.end_ - 1; // do not include '"'
|
|
while (current != end) {
|
|
Char c = *current++;
|
|
if (c == '"')
|
|
break;
|
|
else if (c == '\\') {
|
|
if (current == end)
|
|
return addError("Empty escape sequence in string", token, current);
|
|
Char escape = *current++;
|
|
switch (escape) {
|
|
case '"':
|
|
decoded += '"';
|
|
break;
|
|
case '/':
|
|
decoded += '/';
|
|
break;
|
|
case '\\':
|
|
decoded += '\\';
|
|
break;
|
|
case 'b':
|
|
decoded += '\b';
|
|
break;
|
|
case 'f':
|
|
decoded += '\f';
|
|
break;
|
|
case 'n':
|
|
decoded += '\n';
|
|
break;
|
|
case 'r':
|
|
decoded += '\r';
|
|
break;
|
|
case 't':
|
|
decoded += '\t';
|
|
break;
|
|
case 'u': {
|
|
unsigned int unicode;
|
|
if (!decodeUnicodeCodePoint(token, current, end, unicode))
|
|
return false;
|
|
decoded += codePointToUTF8(unicode);
|
|
} break;
|
|
default:
|
|
return addError("Bad escape sequence in string", token, current);
|
|
}
|
|
} else {
|
|
decoded += c;
|
|
}
|
|
}
|
|
return true;
|
|
}
|
|
|
|
bool OurReader::decodeUnicodeCodePoint(Token& token,
|
|
Location& current,
|
|
Location end,
|
|
unsigned int& unicode) {
|
|
|
|
if (!decodeUnicodeEscapeSequence(token, current, end, unicode))
|
|
return false;
|
|
if (unicode >= 0xD800 && unicode <= 0xDBFF) {
|
|
// surrogate pairs
|
|
if (end - current < 6)
|
|
return addError(
|
|
"additional six characters expected to parse unicode surrogate pair.",
|
|
token,
|
|
current);
|
|
unsigned int surrogatePair;
|
|
if (*(current++) == '\\' && *(current++) == 'u') {
|
|
if (decodeUnicodeEscapeSequence(token, current, end, surrogatePair)) {
|
|
unicode = 0x10000 + ((unicode & 0x3FF) << 10) + (surrogatePair & 0x3FF);
|
|
} else
|
|
return false;
|
|
} else
|
|
return addError("expecting another \\u token to begin the second half of "
|
|
"a unicode surrogate pair",
|
|
token,
|
|
current);
|
|
}
|
|
return true;
|
|
}
|
|
|
|
bool OurReader::decodeUnicodeEscapeSequence(Token& token,
|
|
Location& current,
|
|
Location end,
|
|
unsigned int& unicode) {
|
|
if (end - current < 4)
|
|
return addError(
|
|
"Bad unicode escape sequence in string: four digits expected.",
|
|
token,
|
|
current);
|
|
unicode = 0;
|
|
for (int index = 0; index < 4; ++index) {
|
|
Char c = *current++;
|
|
unicode *= 16;
|
|
if (c >= '0' && c <= '9')
|
|
unicode += c - '0';
|
|
else if (c >= 'a' && c <= 'f')
|
|
unicode += c - 'a' + 10;
|
|
else if (c >= 'A' && c <= 'F')
|
|
unicode += c - 'A' + 10;
|
|
else
|
|
return addError(
|
|
"Bad unicode escape sequence in string: hexadecimal digit expected.",
|
|
token,
|
|
current);
|
|
}
|
|
return true;
|
|
}
|
|
|
|
bool
|
|
OurReader::addError(const std::string& message, Token& token, Location extra) {
|
|
ErrorInfo info;
|
|
info.token_ = token;
|
|
info.message_ = message;
|
|
info.extra_ = extra;
|
|
errors_.push_back(info);
|
|
return false;
|
|
}
|
|
|
|
bool OurReader::recoverFromError(TokenType skipUntilToken) {
|
|
int errorCount = int(errors_.size());
|
|
Token skip;
|
|
for (;;) {
|
|
if (!readToken(skip))
|
|
errors_.resize(errorCount); // discard errors caused by recovery
|
|
if (skip.type_ == skipUntilToken || skip.type_ == tokenEndOfStream)
|
|
break;
|
|
}
|
|
errors_.resize(errorCount);
|
|
return false;
|
|
}
|
|
|
|
bool OurReader::addErrorAndRecover(const std::string& message,
|
|
Token& token,
|
|
TokenType skipUntilToken) {
|
|
addError(message, token);
|
|
return recoverFromError(skipUntilToken);
|
|
}
|
|
|
|
Value& OurReader::currentValue() { return *(nodes_.top()); }
|
|
|
|
OurReader::Char OurReader::getNextChar() {
|
|
if (current_ == end_)
|
|
return 0;
|
|
return *current_++;
|
|
}
|
|
|
|
void OurReader::getLocationLineAndColumn(Location location,
|
|
int& line,
|
|
int& column) const {
|
|
Location current = begin_;
|
|
Location lastLineStart = current;
|
|
line = 0;
|
|
while (current < location && current != end_) {
|
|
Char c = *current++;
|
|
if (c == '\r') {
|
|
if (*current == '\n')
|
|
++current;
|
|
lastLineStart = current;
|
|
++line;
|
|
} else if (c == '\n') {
|
|
lastLineStart = current;
|
|
++line;
|
|
}
|
|
}
|
|
// column & line start at 1
|
|
column = int(location - lastLineStart) + 1;
|
|
++line;
|
|
}
|
|
|
|
std::string OurReader::getLocationLineAndColumn(Location location) const {
|
|
int line, column;
|
|
getLocationLineAndColumn(location, line, column);
|
|
char buffer[18 + 16 + 16 + 1];
|
|
#if defined(_MSC_VER) && defined(__STDC_SECURE_LIB__)
|
|
#if defined(WINCE)
|
|
_snprintf(buffer, sizeof(buffer), "Line %d, Column %d", line, column);
|
|
#else
|
|
sprintf_s(buffer, sizeof(buffer), "Line %d, Column %d", line, column);
|
|
#endif
|
|
#else
|
|
snprintf(buffer, sizeof(buffer), "Line %d, Column %d", line, column);
|
|
#endif
|
|
return buffer;
|
|
}
|
|
|
|
std::string OurReader::getFormattedErrorMessages() const {
|
|
std::string formattedMessage;
|
|
for (Errors::const_iterator itError = errors_.begin();
|
|
itError != errors_.end();
|
|
++itError) {
|
|
const ErrorInfo& error = *itError;
|
|
formattedMessage +=
|
|
"* " + getLocationLineAndColumn(error.token_.start_) + "\n";
|
|
formattedMessage += " " + error.message_ + "\n";
|
|
if (error.extra_)
|
|
formattedMessage +=
|
|
"See " + getLocationLineAndColumn(error.extra_) + " for detail.\n";
|
|
}
|
|
return formattedMessage;
|
|
}
|
|
|
|
std::vector<OurReader::StructuredError> OurReader::getStructuredErrors() const {
|
|
std::vector<OurReader::StructuredError> allErrors;
|
|
for (Errors::const_iterator itError = errors_.begin();
|
|
itError != errors_.end();
|
|
++itError) {
|
|
const ErrorInfo& error = *itError;
|
|
OurReader::StructuredError structured;
|
|
structured.offset_start = error.token_.start_ - begin_;
|
|
structured.offset_limit = error.token_.end_ - begin_;
|
|
structured.message = error.message_;
|
|
allErrors.push_back(structured);
|
|
}
|
|
return allErrors;
|
|
}
|
|
|
|
bool OurReader::pushError(const Value& value, const std::string& message) {
|
|
size_t length = end_ - begin_;
|
|
if(value.getOffsetStart() > length
|
|
|| value.getOffsetLimit() > length)
|
|
return false;
|
|
Token token;
|
|
token.type_ = tokenError;
|
|
token.start_ = begin_ + value.getOffsetStart();
|
|
token.end_ = end_ + value.getOffsetLimit();
|
|
ErrorInfo info;
|
|
info.token_ = token;
|
|
info.message_ = message;
|
|
info.extra_ = 0;
|
|
errors_.push_back(info);
|
|
return true;
|
|
}
|
|
|
|
bool OurReader::pushError(const Value& value, const std::string& message, const Value& extra) {
|
|
size_t length = end_ - begin_;
|
|
if(value.getOffsetStart() > length
|
|
|| value.getOffsetLimit() > length
|
|
|| extra.getOffsetLimit() > length)
|
|
return false;
|
|
Token token;
|
|
token.type_ = tokenError;
|
|
token.start_ = begin_ + value.getOffsetStart();
|
|
token.end_ = begin_ + value.getOffsetLimit();
|
|
ErrorInfo info;
|
|
info.token_ = token;
|
|
info.message_ = message;
|
|
info.extra_ = begin_ + extra.getOffsetStart();
|
|
errors_.push_back(info);
|
|
return true;
|
|
}
|
|
|
|
bool OurReader::good() const {
|
|
return !errors_.size();
|
|
}
|
|
|
|
|
|
class OurCharReader : public CharReader {
|
|
bool const collectComments_;
|
|
OurReader reader_;
|
|
public:
|
|
OurCharReader(
|
|
bool collectComments,
|
|
OurFeatures const& features)
|
|
: collectComments_(collectComments)
|
|
, reader_(features)
|
|
{}
|
|
virtual bool parse(
|
|
char const* beginDoc, char const* endDoc,
|
|
Value* root, std::string* errs) {
|
|
bool ok = reader_.parse(beginDoc, endDoc, *root, collectComments_);
|
|
if (errs) {
|
|
*errs = reader_.getFormattedErrorMessages();
|
|
}
|
|
return ok;
|
|
}
|
|
};
|
|
|
|
CharReaderBuilder::CharReaderBuilder()
|
|
{
|
|
setDefaults(&settings_);
|
|
}
|
|
CharReaderBuilder::~CharReaderBuilder()
|
|
{}
|
|
CharReader* CharReaderBuilder::newCharReader() const
|
|
{
|
|
bool collectComments = settings_["collectComments"].asBool();
|
|
OurFeatures features = OurFeatures::all();
|
|
features.allowComments_ = settings_["allowComments"].asBool();
|
|
features.strictRoot_ = settings_["strictRoot"].asBool();
|
|
features.allowDroppedNullPlaceholders_ = settings_["allowDroppedNullPlaceholders"].asBool();
|
|
features.allowNumericKeys_ = settings_["allowNumericKeys"].asBool();
|
|
features.allowSingleQuotes_ = settings_["allowSingleQuotes"].asBool();
|
|
features.stackLimit_ = settings_["stackLimit"].asInt();
|
|
features.failIfExtra_ = settings_["failIfExtra"].asBool();
|
|
features.rejectDupKeys_ = settings_["rejectDupKeys"].asBool();
|
|
return new OurCharReader(collectComments, features);
|
|
}
|
|
static void getValidReaderKeys(std::set<std::string>* valid_keys)
|
|
{
|
|
valid_keys->clear();
|
|
valid_keys->insert("collectComments");
|
|
valid_keys->insert("allowComments");
|
|
valid_keys->insert("strictRoot");
|
|
valid_keys->insert("allowDroppedNullPlaceholders");
|
|
valid_keys->insert("allowNumericKeys");
|
|
valid_keys->insert("allowSingleQuotes");
|
|
valid_keys->insert("stackLimit");
|
|
valid_keys->insert("failIfExtra");
|
|
valid_keys->insert("rejectDupKeys");
|
|
}
|
|
bool CharReaderBuilder::validate(Json::Value* invalid) const
|
|
{
|
|
Json::Value my_invalid;
|
|
if (!invalid) invalid = &my_invalid; // so we do not need to test for NULL
|
|
Json::Value& inv = *invalid;
|
|
std::set<std::string> valid_keys;
|
|
getValidReaderKeys(&valid_keys);
|
|
Value::Members keys = settings_.getMemberNames();
|
|
size_t n = keys.size();
|
|
for (size_t i = 0; i < n; ++i) {
|
|
std::string const& key = keys[i];
|
|
if (valid_keys.find(key) == valid_keys.end()) {
|
|
inv[key] = settings_[key];
|
|
}
|
|
}
|
|
return 0u == inv.size();
|
|
}
|
|
Value& CharReaderBuilder::operator[](std::string key)
|
|
{
|
|
return settings_[key];
|
|
}
|
|
// static
|
|
void CharReaderBuilder::strictMode(Json::Value* settings)
|
|
{
|
|
//! [CharReaderBuilderStrictMode]
|
|
(*settings)["allowComments"] = false;
|
|
(*settings)["strictRoot"] = true;
|
|
(*settings)["allowDroppedNullPlaceholders"] = false;
|
|
(*settings)["allowNumericKeys"] = false;
|
|
(*settings)["allowSingleQuotes"] = false;
|
|
(*settings)["failIfExtra"] = true;
|
|
(*settings)["rejectDupKeys"] = true;
|
|
//! [CharReaderBuilderStrictMode]
|
|
}
|
|
// static
|
|
void CharReaderBuilder::setDefaults(Json::Value* settings)
|
|
{
|
|
//! [CharReaderBuilderDefaults]
|
|
(*settings)["collectComments"] = true;
|
|
(*settings)["allowComments"] = true;
|
|
(*settings)["strictRoot"] = false;
|
|
(*settings)["allowDroppedNullPlaceholders"] = false;
|
|
(*settings)["allowNumericKeys"] = false;
|
|
(*settings)["allowSingleQuotes"] = false;
|
|
(*settings)["stackLimit"] = 1000;
|
|
(*settings)["failIfExtra"] = false;
|
|
(*settings)["rejectDupKeys"] = false;
|
|
//! [CharReaderBuilderDefaults]
|
|
}
|
|
|
|
//////////////////////////////////
|
|
// global functions
|
|
|
|
bool parseFromStream(
|
|
CharReader::Factory const& fact, std::istream& sin,
|
|
Value* root, std::string* errs)
|
|
{
|
|
std::ostringstream ssin;
|
|
ssin << sin.rdbuf();
|
|
std::string doc = ssin.str();
|
|
char const* begin = doc.data();
|
|
char const* end = begin + doc.size();
|
|
// Note that we do not actually need a null-terminator.
|
|
CharReaderPtr const reader(fact.newCharReader());
|
|
return reader->parse(begin, end, root, errs);
|
|
}
|
|
|
|
std::istream& operator>>(std::istream& sin, Value& root) {
|
|
CharReaderBuilder b;
|
|
std::string errs;
|
|
bool ok = parseFromStream(b, sin, &root, &errs);
|
|
if (!ok) {
|
|
fprintf(stderr,
|
|
"Error from reader: %s",
|
|
errs.c_str());
|
|
|
|
throwRuntimeError("reader error");
|
|
}
|
|
return sin;
|
|
}
|
|
|
|
} // namespace Json
|