pcre/pcre_scanner.cc

200 lines
5.4 KiB
C++

// Copyright (c) 2005, Google Inc.
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following disclaimer
// in the documentation and/or other materials provided with the
// distribution.
// * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Author: Sanjay Ghemawat
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#include <vector>
#include <assert.h>
#include "pcrecpp_internal.h"
#include "pcre_scanner.h"
using std::vector;
namespace pcrecpp {
Scanner::Scanner()
: data_(),
input_(data_),
skip_(NULL),
should_skip_(false),
skip_repeat_(false),
save_comments_(false),
comments_(NULL),
comments_offset_(0) {
}
Scanner::Scanner(const string& in)
: data_(in),
input_(data_),
skip_(NULL),
should_skip_(false),
skip_repeat_(false),
save_comments_(false),
comments_(NULL),
comments_offset_(0) {
}
Scanner::~Scanner() {
delete skip_;
delete comments_;
}
void Scanner::SetSkipExpression(const char* re) {
delete skip_;
if (re != NULL) {
skip_ = new RE(re);
should_skip_ = true;
skip_repeat_ = true;
ConsumeSkip();
} else {
skip_ = NULL;
should_skip_ = false;
skip_repeat_ = false;
}
}
void Scanner::Skip(const char* re) {
delete skip_;
if (re != NULL) {
skip_ = new RE(re);
should_skip_ = true;
skip_repeat_ = false;
ConsumeSkip();
} else {
skip_ = NULL;
should_skip_ = false;
skip_repeat_ = false;
}
}
void Scanner::DisableSkip() {
assert(skip_ != NULL);
should_skip_ = false;
}
void Scanner::EnableSkip() {
assert(skip_ != NULL);
should_skip_ = true;
ConsumeSkip();
}
int Scanner::LineNumber() const {
// TODO: Make it more efficient by keeping track of the last point
// where we computed line numbers and counting newlines since then.
// We could use std:count, but not all systems have it. :-(
int count = 1;
for (const char* p = data_.data(); p < input_.data(); ++p)
if (*p == '\n')
++count;
return count;
}
int Scanner::Offset() const {
return (int)(input_.data() - data_.c_str());
}
bool Scanner::LookingAt(const RE& re) const {
int consumed;
return re.DoMatch(input_, RE::ANCHOR_START, &consumed, 0, 0);
}
bool Scanner::Consume(const RE& re,
const Arg& arg0,
const Arg& arg1,
const Arg& arg2) {
const bool result = re.Consume(&input_, arg0, arg1, arg2);
if (result && should_skip_) ConsumeSkip();
return result;
}
// helper function to consume *skip_ and honour save_comments_
void Scanner::ConsumeSkip() {
const char* start_data = input_.data();
while (skip_->Consume(&input_)) {
if (!skip_repeat_) {
// Only one skip allowed.
break;
}
}
if (save_comments_) {
if (comments_ == NULL) {
comments_ = new vector<StringPiece>;
}
// already pointing one past end, so no need to +1
int length = (int)(input_.data() - start_data);
if (length > 0) {
comments_->push_back(StringPiece(start_data, length));
}
}
}
void Scanner::GetComments(int start, int end, vector<StringPiece> *ranges) {
// short circuit out if we've not yet initialized comments_
// (e.g., when save_comments is false)
if (!comments_) {
return;
}
// TODO: if we guarantee that comments_ will contain StringPieces
// that are ordered by their start, then we can do a binary search
// for the first StringPiece at or past start and then scan for the
// ones contained in the range, quit early (use equal_range or
// lower_bound)
for (vector<StringPiece>::const_iterator it = comments_->begin();
it != comments_->end(); ++it) {
if ((it->data() >= data_.c_str() + start &&
it->data() + it->size() <= data_.c_str() + end)) {
ranges->push_back(*it);
}
}
}
void Scanner::GetNextComments(vector<StringPiece> *ranges) {
// short circuit out if we've not yet initialized comments_
// (e.g., when save_comments is false)
if (!comments_) {
return;
}
for (vector<StringPiece>::const_iterator it =
comments_->begin() + comments_offset_;
it != comments_->end(); ++it) {
ranges->push_back(*it);
++comments_offset_;
}
}
} // namespace pcrecpp