Skip to content
Permalink
9bfb9ba527
Switch branches/tags

Name already in use

A tag already exists with the provided branch name. Many Git commands accept both tag and branch names, so creating this branch may cause unexpected behavior. Are you sure you want to create this branch?
Go to file
 
 
Cannot retrieve contributors at this time
1565 lines (1436 sloc) 42.5 KB
;(function (sax) { // wrapper for non-node envs
sax.parser = function (strict, opt) { return new SAXParser(strict, opt) }
sax.SAXParser = SAXParser
sax.SAXStream = SAXStream
sax.createStream = createStream
// When we pass the MAX_BUFFER_LENGTH position, start checking for buffer overruns.
// When we check, schedule the next check for MAX_BUFFER_LENGTH - (max(buffer lengths)),
// since that's the earliest that a buffer overrun could occur. This way, checks are
// as rare as required, but as often as necessary to ensure never crossing this bound.
// Furthermore, buffers are only tested at most once per write(), so passing a very
// large string into write() might have undesirable effects, but this is manageable by
// the caller, so it is assumed to be safe. Thus, a call to write() may, in the extreme
// edge case, result in creating at most one complete copy of the string passed in.
// Set to Infinity to have unlimited buffers.
sax.MAX_BUFFER_LENGTH = 64 * 1024
var buffers = [
'comment', 'sgmlDecl', 'textNode', 'tagName', 'doctype',
'procInstName', 'procInstBody', 'entity', 'attribName',
'attribValue', 'cdata', 'script'
]
sax.EVENTS = [
'text',
'processinginstruction',
'sgmldeclaration',
'doctype',
'comment',
'opentagstart',
'attribute',
'opentag',
'closetag',
'opencdata',
'cdata',
'closecdata',
'error',
'end',
'ready',
'script',
'opennamespace',
'closenamespace'
]
function SAXParser (strict, opt) {
if (!(this instanceof SAXParser)) {
return new SAXParser(strict, opt)
}
var parser = this
clearBuffers(parser)
parser.q = parser.c = ''
parser.bufferCheckPosition = sax.MAX_BUFFER_LENGTH
parser.opt = opt || {}
parser.opt.lowercase = parser.opt.lowercase || parser.opt.lowercasetags
parser.looseCase = parser.opt.lowercase ? 'toLowerCase' : 'toUpperCase'
parser.tags = []
parser.closed = parser.closedRoot = parser.sawRoot = false
parser.tag = parser.error = null
parser.strict = !!strict
parser.noscript = !!(strict || parser.opt.noscript)
parser.state = S.BEGIN
parser.strictEntities = parser.opt.strictEntities
parser.ENTITIES = parser.strictEntities ? Object.create(sax.XML_ENTITIES) : Object.create(sax.ENTITIES)
parser.attribList = []
// namespaces form a prototype chain.
// it always points at the current tag,
// which protos to its parent tag.
if (parser.opt.xmlns) {
parser.ns = Object.create(rootNS)
}
// mostly just for error reporting
parser.trackPosition = parser.opt.position !== false
if (parser.trackPosition) {
parser.position = parser.line = parser.column = 0
}
emit(parser, 'onready')
}
if (!Object.create) {
Object.create = function (o) {
function F () {}
F.prototype = o
var newf = new F()
return newf
}
}
if (!Object.keys) {
Object.keys = function (o) {
var a = []
for (var i in o) if (o.hasOwnProperty(i)) a.push(i)
return a
}
}
function checkBufferLength (parser) {
var maxAllowed = Math.max(sax.MAX_BUFFER_LENGTH, 10)
var maxActual = 0
for (var i = 0, l = buffers.length; i < l; i++) {
var len = parser[buffers[i]].length
if (len > maxAllowed) {
// Text/cdata nodes can get big, and since they're buffered,
// we can get here under normal conditions.
// Avoid issues by emitting the text node now,
// so at least it won't get any bigger.
switch (buffers[i]) {
case 'textNode':
closeText(parser)
break
case 'cdata':
emitNode(parser, 'oncdata', parser.cdata)
parser.cdata = ''
break
case 'script':
emitNode(parser, 'onscript', parser.script)
parser.script = ''
break
default:
error(parser, 'Max buffer length exceeded: ' + buffers[i])
}
}
maxActual = Math.max(maxActual, len)
}
// schedule the next check for the earliest possible buffer overrun.
var m = sax.MAX_BUFFER_LENGTH - maxActual
parser.bufferCheckPosition = m + parser.position
}
function clearBuffers (parser) {
for (var i = 0, l = buffers.length; i < l; i++) {
parser[buffers[i]] = ''
}
}
function flushBuffers (parser) {
closeText(parser)
if (parser.cdata !== '') {
emitNode(parser, 'oncdata', parser.cdata)
parser.cdata = ''
}
if (parser.script !== '') {
emitNode(parser, 'onscript', parser.script)
parser.script = ''
}
}
SAXParser.prototype = {
end: function () { end(this) },
write: write,
resume: function () { this.error = null; return this },
close: function () { return this.write(null) },
flush: function () { flushBuffers(this) }
}
var Stream
try {
Stream = require('stream').Stream
} catch (ex) {
Stream = function () {}
}
var streamWraps = sax.EVENTS.filter(function (ev) {
return ev !== 'error' && ev !== 'end'
})
function createStream (strict, opt) {
return new SAXStream(strict, opt)
}
function SAXStream (strict, opt) {
if (!(this instanceof SAXStream)) {
return new SAXStream(strict, opt)
}
Stream.apply(this)
this._parser = new SAXParser(strict, opt)
this.writable = true
this.readable = true
var me = this
this._parser.onend = function () {
me.emit('end')
}
this._parser.onerror = function (er) {
me.emit('error', er)
// if didn't throw, then means error was handled.
// go ahead and clear error, so we can write again.
me._parser.error = null
}
this._decoder = null
streamWraps.forEach(function (ev) {
Object.defineProperty(me, 'on' + ev, {
get: function () {
return me._parser['on' + ev]
},
set: function (h) {
if (!h) {
me.removeAllListeners(ev)
me._parser['on' + ev] = h
return h
}
me.on(ev, h)
},
enumerable: true,
configurable: false
})
})
}
SAXStream.prototype = Object.create(Stream.prototype, {
constructor: {
value: SAXStream
}
})
SAXStream.prototype.write = function (data) {
if (typeof Buffer === 'function' &&
typeof Buffer.isBuffer === 'function' &&
Buffer.isBuffer(data)) {
if (!this._decoder) {
var SD = require('string_decoder').StringDecoder
this._decoder = new SD('utf8')
}
data = this._decoder.write(data)
}
this._parser.write(data.toString())
this.emit('data', data)
return true
}
SAXStream.prototype.end = function (chunk) {
if (chunk && chunk.length) {
this.write(chunk)
}
this._parser.end()
return true
}
SAXStream.prototype.on = function (ev, handler) {
var me = this
if (!me._parser['on' + ev] && streamWraps.indexOf(ev) !== -1) {
me._parser['on' + ev] = function () {
var args = arguments.length === 1 ? [arguments[0]] : Array.apply(null, arguments)
args.splice(0, 0, ev)
me.emit.apply(me, args)
}
}
return Stream.prototype.on.call(me, ev, handler)
}
// this really needs to be replaced with character classes.
// XML allows all manner of ridiculous numbers and digits.
var CDATA = '[CDATA['
var DOCTYPE = 'DOCTYPE'
var XML_NAMESPACE = 'http://www.w3.org/XML/1998/namespace'
var XMLNS_NAMESPACE = 'http://www.w3.org/2000/xmlns/'
var rootNS = { xml: XML_NAMESPACE, xmlns: XMLNS_NAMESPACE }
// http://www.w3.org/TR/REC-xml/#NT-NameStartChar
// This implementation works on strings, a single character at a time
// as such, it cannot ever support astral-plane characters (10000-EFFFF)
// without a significant breaking change to either this parser, or the
// JavaScript language. Implementation of an emoji-capable xml parser
// is left as an exercise for the reader.
var nameStart = /[:_A-Za-z\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u02FF\u0370-\u037D\u037F-\u1FFF\u200C-\u200D\u2070-\u218F\u2C00-\u2FEF\u3001-\uD7FF\uF900-\uFDCF\uFDF0-\uFFFD]/
var nameBody = /[:_A-Za-z\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u02FF\u0370-\u037D\u037F-\u1FFF\u200C-\u200D\u2070-\u218F\u2C00-\u2FEF\u3001-\uD7FF\uF900-\uFDCF\uFDF0-\uFFFD\u00B7\u0300-\u036F\u203F-\u2040.\d-]/
var entityStart = /[#:_A-Za-z\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u02FF\u0370-\u037D\u037F-\u1FFF\u200C-\u200D\u2070-\u218F\u2C00-\u2FEF\u3001-\uD7FF\uF900-\uFDCF\uFDF0-\uFFFD]/
var entityBody = /[#:_A-Za-z\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u02FF\u0370-\u037D\u037F-\u1FFF\u200C-\u200D\u2070-\u218F\u2C00-\u2FEF\u3001-\uD7FF\uF900-\uFDCF\uFDF0-\uFFFD\u00B7\u0300-\u036F\u203F-\u2040.\d-]/
function isWhitespace (c) {
return c === ' ' || c === '\n' || c === '\r' || c === '\t'
}
function isQuote (c) {
return c === '"' || c === '\''
}
function isAttribEnd (c) {
return c === '>' || isWhitespace(c)
}
function isMatch (regex, c) {
return regex.test(c)
}
function notMatch (regex, c) {
return !isMatch(regex, c)
}
var S = 0
sax.STATE = {
BEGIN: S++, // leading byte order mark or whitespace
BEGIN_WHITESPACE: S++, // leading whitespace
TEXT: S++, // general stuff
TEXT_ENTITY: S++, // &amp and such.
OPEN_WAKA: S++, // <
SGML_DECL: S++, // <!BLARG
SGML_DECL_QUOTED: S++, // <!BLARG foo "bar
DOCTYPE: S++, // <!DOCTYPE
DOCTYPE_QUOTED: S++, // <!DOCTYPE "//blah
DOCTYPE_DTD: S++, // <!DOCTYPE "//blah" [ ...
DOCTYPE_DTD_QUOTED: S++, // <!DOCTYPE "//blah" [ "foo
COMMENT_STARTING: S++, // <!-
COMMENT: S++, // <!--
COMMENT_ENDING: S++, // <!-- blah -
COMMENT_ENDED: S++, // <!-- blah --
CDATA: S++, // <![CDATA[ something
CDATA_ENDING: S++, // ]
CDATA_ENDING_2: S++, // ]]
PROC_INST: S++, // <?hi
PROC_INST_BODY: S++, // <?hi there
PROC_INST_ENDING: S++, // <?hi "there" ?
OPEN_TAG: S++, // <strong
OPEN_TAG_SLASH: S++, // <strong /
ATTRIB: S++, // <a
ATTRIB_NAME: S++, // <a foo
ATTRIB_NAME_SAW_WHITE: S++, // <a foo _
ATTRIB_VALUE: S++, // <a foo=
ATTRIB_VALUE_QUOTED: S++, // <a foo="bar
ATTRIB_VALUE_CLOSED: S++, // <a foo="bar"
ATTRIB_VALUE_UNQUOTED: S++, // <a foo=bar
ATTRIB_VALUE_ENTITY_Q: S++, // <foo bar="&quot;"
ATTRIB_VALUE_ENTITY_U: S++, // <foo bar=&quot
CLOSE_TAG: S++, // </a
CLOSE_TAG_SAW_WHITE: S++, // </a >
SCRIPT: S++, // <script> ...
SCRIPT_ENDING: S++ // <script> ... <
}
sax.XML_ENTITIES = {
'amp': '&',
'gt': '>',
'lt': '<',
'quot': '"',
'apos': "'"
}
sax.ENTITIES = {
'amp': '&',
'gt': '>',
'lt': '<',
'quot': '"',
'apos': "'",
'AElig': 198,
'Aacute': 193,
'Acirc': 194,
'Agrave': 192,
'Aring': 197,
'Atilde': 195,
'Auml': 196,
'Ccedil': 199,
'ETH': 208,
'Eacute': 201,
'Ecirc': 202,
'Egrave': 200,
'Euml': 203,
'Iacute': 205,
'Icirc': 206,
'Igrave': 204,
'Iuml': 207,
'Ntilde': 209,
'Oacute': 211,
'Ocirc': 212,
'Ograve': 210,
'Oslash': 216,
'Otilde': 213,
'Ouml': 214,
'THORN': 222,
'Uacute': 218,
'Ucirc': 219,
'Ugrave': 217,
'Uuml': 220,
'Yacute': 221,
'aacute': 225,
'acirc': 226,
'aelig': 230,
'agrave': 224,
'aring': 229,
'atilde': 227,
'auml': 228,
'ccedil': 231,
'eacute': 233,
'ecirc': 234,
'egrave': 232,
'eth': 240,
'euml': 235,
'iacute': 237,
'icirc': 238,
'igrave': 236,
'iuml': 239,
'ntilde': 241,
'oacute': 243,
'ocirc': 244,
'ograve': 242,
'oslash': 248,
'otilde': 245,
'ouml': 246,
'szlig': 223,
'thorn': 254,
'uacute': 250,
'ucirc': 251,
'ugrave': 249,
'uuml': 252,
'yacute': 253,
'yuml': 255,
'copy': 169,
'reg': 174,
'nbsp': 160,
'iexcl': 161,
'cent': 162,
'pound': 163,
'curren': 164,
'yen': 165,
'brvbar': 166,
'sect': 167,
'uml': 168,
'ordf': 170,
'laquo': 171,
'not': 172,
'shy': 173,
'macr': 175,
'deg': 176,
'plusmn': 177,
'sup1': 185,
'sup2': 178,
'sup3': 179,
'acute': 180,
'micro': 181,
'para': 182,
'middot': 183,
'cedil': 184,
'ordm': 186,
'raquo': 187,
'frac14': 188,
'frac12': 189,
'frac34': 190,
'iquest': 191,
'times': 215,
'divide': 247,
'OElig': 338,
'oelig': 339,
'Scaron': 352,
'scaron': 353,
'Yuml': 376,
'fnof': 402,
'circ': 710,
'tilde': 732,
'Alpha': 913,
'Beta': 914,
'Gamma': 915,
'Delta': 916,
'Epsilon': 917,
'Zeta': 918,
'Eta': 919,
'Theta': 920,
'Iota': 921,
'Kappa': 922,
'Lambda': 923,
'Mu': 924,
'Nu': 925,
'Xi': 926,
'Omicron': 927,
'Pi': 928,
'Rho': 929,
'Sigma': 931,
'Tau': 932,
'Upsilon': 933,
'Phi': 934,
'Chi': 935,
'Psi': 936,
'Omega': 937,
'alpha': 945,
'beta': 946,
'gamma': 947,
'delta': 948,
'epsilon': 949,
'zeta': 950,
'eta': 951,
'theta': 952,
'iota': 953,
'kappa': 954,
'lambda': 955,
'mu': 956,
'nu': 957,
'xi': 958,
'omicron': 959,
'pi': 960,
'rho': 961,
'sigmaf': 962,
'sigma': 963,
'tau': 964,
'upsilon': 965,
'phi': 966,
'chi': 967,
'psi': 968,
'omega': 969,
'thetasym': 977,
'upsih': 978,
'piv': 982,
'ensp': 8194,
'emsp': 8195,
'thinsp': 8201,
'zwnj': 8204,
'zwj': 8205,
'lrm': 8206,
'rlm': 8207,
'ndash': 8211,
'mdash': 8212,
'lsquo': 8216,
'rsquo': 8217,
'sbquo': 8218,
'ldquo': 8220,
'rdquo': 8221,
'bdquo': 8222,
'dagger': 8224,
'Dagger': 8225,
'bull': 8226,
'hellip': 8230,
'permil': 8240,
'prime': 8242,
'Prime': 8243,
'lsaquo': 8249,
'rsaquo': 8250,
'oline': 8254,
'frasl': 8260,
'euro': 8364,
'image': 8465,
'weierp': 8472,
'real': 8476,
'trade': 8482,
'alefsym': 8501,
'larr': 8592,
'uarr': 8593,
'rarr': 8594,
'darr': 8595,
'harr': 8596,
'crarr': 8629,
'lArr': 8656,
'uArr': 8657,
'rArr': 8658,
'dArr': 8659,
'hArr': 8660,
'forall': 8704,
'part': 8706,
'exist': 8707,
'empty': 8709,
'nabla': 8711,
'isin': 8712,
'notin': 8713,
'ni': 8715,
'prod': 8719,
'sum': 8721,
'minus': 8722,
'lowast': 8727,
'radic': 8730,
'prop': 8733,
'infin': 8734,
'ang': 8736,
'and': 8743,
'or': 8744,
'cap': 8745,
'cup': 8746,
'int': 8747,
'there4': 8756,
'sim': 8764,
'cong': 8773,
'asymp': 8776,
'ne': 8800,
'equiv': 8801,
'le': 8804,
'ge': 8805,
'sub': 8834,
'sup': 8835,
'nsub': 8836,
'sube': 8838,
'supe': 8839,
'oplus': 8853,
'otimes': 8855,
'perp': 8869,
'sdot': 8901,
'lceil': 8968,
'rceil': 8969,
'lfloor': 8970,
'rfloor': 8971,
'lang': 9001,
'rang': 9002,
'loz': 9674,
'spades': 9824,
'clubs': 9827,
'hearts': 9829,
'diams': 9830
}
Object.keys(sax.ENTITIES).forEach(function (key) {
var e = sax.ENTITIES[key]
var s = typeof e === 'number' ? String.fromCharCode(e) : e
sax.ENTITIES[key] = s
})
for (var s in sax.STATE) {
sax.STATE[sax.STATE[s]] = s
}
// shorthand
S = sax.STATE
function emit (parser, event, data) {
parser[event] && parser[event](data)
}
function emitNode (parser, nodeType, data) {
if (parser.textNode) closeText(parser)
emit(parser, nodeType, data)
}
function closeText (parser) {
parser.textNode = textopts(parser.opt, parser.textNode)
if (parser.textNode) emit(parser, 'ontext', parser.textNode)
parser.textNode = ''
}
function textopts (opt, text) {
if (opt.trim) text = text.trim()
if (opt.normalize) text = text.replace(/\s+/g, ' ')
return text
}
function error (parser, er) {
closeText(parser)
if (parser.trackPosition) {
er += '\nLine: ' + parser.line +
'\nColumn: ' + parser.column +
'\nChar: ' + parser.c
}
er = new Error(er)
parser.error = er
emit(parser, 'onerror', er)
return parser
}
function end (parser) {
if (parser.sawRoot && !parser.closedRoot) strictFail(parser, 'Unclosed root tag')
if ((parser.state !== S.BEGIN) &&
(parser.state !== S.BEGIN_WHITESPACE) &&
(parser.state !== S.TEXT)) {
error(parser, 'Unexpected end')
}
closeText(parser)
parser.c = ''
parser.closed = true
emit(parser, 'onend')
SAXParser.call(parser, parser.strict, parser.opt)
return parser
}
function strictFail (parser, message) {
if (typeof parser !== 'object' || !(parser instanceof SAXParser)) {
throw new Error('bad call to strictFail')
}
if (parser.strict) {
error(parser, message)
}
}
function newTag (parser) {
if (!parser.strict) parser.tagName = parser.tagName[parser.looseCase]()
var parent = parser.tags[parser.tags.length - 1] || parser
var tag = parser.tag = { name: parser.tagName, attributes: {} }
// will be overridden if tag contails an xmlns="foo" or xmlns:foo="bar"
if (parser.opt.xmlns) {
tag.ns = parent.ns
}
parser.attribList.length = 0
emitNode(parser, 'onopentagstart', tag)
}
function qname (name, attribute) {
var i = name.indexOf(':')
var qualName = i < 0 ? [ '', name ] : name.split(':')
var prefix = qualName[0]
var local = qualName[1]
// <x "xmlns"="http://foo">
if (attribute && name === 'xmlns') {
prefix = 'xmlns'
local = ''
}
return { prefix: prefix, local: local }
}
function attrib (parser) {
if (!parser.strict) {
parser.attribName = parser.attribName[parser.looseCase]()
}
if (parser.attribList.indexOf(parser.attribName) !== -1 ||
parser.tag.attributes.hasOwnProperty(parser.attribName)) {
parser.attribName = parser.attribValue = ''
return
}
if (parser.opt.xmlns) {
var qn = qname(parser.attribName, true)
var prefix = qn.prefix
var local = qn.local
if (prefix === 'xmlns') {
// namespace binding attribute. push the binding into scope
if (local === 'xml' && parser.attribValue !== XML_NAMESPACE) {
strictFail(parser,
'xml: prefix must be bound to ' + XML_NAMESPACE + '\n' +
'Actual: ' + parser.attribValue)
} else if (local === 'xmlns' && parser.attribValue !== XMLNS_NAMESPACE) {
strictFail(parser,
'xmlns: prefix must be bound to ' + XMLNS_NAMESPACE + '\n' +
'Actual: ' + parser.attribValue)
} else {
var tag = parser.tag
var parent = parser.tags[parser.tags.length - 1] || parser
if (tag.ns === parent.ns) {
tag.ns = Object.create(parent.ns)
}
tag.ns[local] = parser.attribValue
}
}
// defer onattribute events until all attributes have been seen
// so any new bindings can take effect. preserve attribute order
// so deferred events can be emitted in document order
parser.attribList.push([parser.attribName, parser.attribValue])
} else {
// in non-xmlns mode, we can emit the event right away
parser.tag.attributes[parser.attribName] = parser.attribValue
emitNode(parser, 'onattribute', {
name: parser.attribName,
value: parser.attribValue
})
}
parser.attribName = parser.attribValue = ''
}
function openTag (parser, selfClosing) {
if (parser.opt.xmlns) {
// emit namespace binding events
var tag = parser.tag
// add namespace info to tag
var qn = qname(parser.tagName)
tag.prefix = qn.prefix
tag.local = qn.local
tag.uri = tag.ns[qn.prefix] || ''
if (tag.prefix && !tag.uri) {
strictFail(parser, 'Unbound namespace prefix: ' +
JSON.stringify(parser.tagName))
tag.uri = qn.prefix
}
var parent = parser.tags[parser.tags.length - 1] || parser
if (tag.ns && parent.ns !== tag.ns) {
Object.keys(tag.ns).forEach(function (p) {
emitNode(parser, 'onopennamespace', {
prefix: p,
uri: tag.ns[p]
})
})
}
// handle deferred onattribute events
// Note: do not apply default ns to attributes:
// http://www.w3.org/TR/REC-xml-names/#defaulting
for (var i = 0, l = parser.attribList.length; i < l; i++) {
var nv = parser.attribList[i]
var name = nv[0]
var value = nv[1]
var qualName = qname(name, true)
var prefix = qualName.prefix
var local = qualName.local
var uri = prefix === '' ? '' : (tag.ns[prefix] || '')
var a = {
name: name,
value: value,
prefix: prefix,
local: local,
uri: uri
}
// if there's any attributes with an undefined namespace,
// then fail on them now.
if (prefix && prefix !== 'xmlns' && !uri) {
strictFail(parser, 'Unbound namespace prefix: ' +
JSON.stringify(prefix))
a.uri = prefix
}
parser.tag.attributes[name] = a
emitNode(parser, 'onattribute', a)
}
parser.attribList.length = 0
}
parser.tag.isSelfClosing = !!selfClosing
// process the tag
parser.sawRoot = true
parser.tags.push(parser.tag)
emitNode(parser, 'onopentag', parser.tag)
if (!selfClosing) {
// special case for <script> in non-strict mode.
if (!parser.noscript && parser.tagName.toLowerCase() === 'script') {
parser.state = S.SCRIPT
} else {
parser.state = S.TEXT
}
parser.tag = null
parser.tagName = ''
}
parser.attribName = parser.attribValue = ''
parser.attribList.length = 0
}
function closeTag (parser) {
if (!parser.tagName) {
strictFail(parser, 'Weird empty close tag.')
parser.textNode += '</>'
parser.state = S.TEXT
return
}
if (parser.script) {
if (parser.tagName !== 'script') {
parser.script += '</' + parser.tagName + '>'
parser.tagName = ''
parser.state = S.SCRIPT
return
}
emitNode(parser, 'onscript', parser.script)
parser.script = ''
}
// first make sure that the closing tag actually exists.
// <a><b></c></b></a> will close everything, otherwise.
var t = parser.tags.length
var tagName = parser.tagName
if (!parser.strict) {
tagName = tagName[parser.looseCase]()
}
var closeTo = tagName
while (t--) {
var close = parser.tags[t]
if (close.name !== closeTo) {
// fail the first time in strict mode
strictFail(parser, 'Unexpected close tag')
} else {
break
}
}
// didn't find it. we already failed for strict, so just abort.
if (t < 0) {
strictFail(parser, 'Unmatched closing tag: ' + parser.tagName)
parser.textNode += '</' + parser.tagName + '>'
parser.state = S.TEXT
return
}
parser.tagName = tagName
var s = parser.tags.length
while (s-- > t) {
var tag = parser.tag = parser.tags.pop()
parser.tagName = parser.tag.name
emitNode(parser, 'onclosetag', parser.tagName)
var x = {}
for (var i in tag.ns) {
x[i] = tag.ns[i]
}
var parent = parser.tags[parser.tags.length - 1] || parser
if (parser.opt.xmlns && tag.ns !== parent.ns) {
// remove namespace bindings introduced by tag
Object.keys(tag.ns).forEach(function (p) {
var n = tag.ns[p]
emitNode(parser, 'onclosenamespace', { prefix: p, uri: n })
})
}
}
if (t === 0) parser.closedRoot = true
parser.tagName = parser.attribValue = parser.attribName = ''
parser.attribList.length = 0
parser.state = S.TEXT
}
function parseEntity (parser) {
var entity = parser.entity
var entityLC = entity.toLowerCase()
var num
var numStr = ''
if (parser.ENTITIES[entity]) {
return parser.ENTITIES[entity]
}
if (parser.ENTITIES[entityLC]) {
return parser.ENTITIES[entityLC]
}
entity = entityLC
if (entity.charAt(0) === '#') {
if (entity.charAt(1) === 'x') {
entity = entity.slice(2)
num = parseInt(entity, 16)
numStr = num.toString(16)
} else {
entity = entity.slice(1)
num = parseInt(entity, 10)
numStr = num.toString(10)
}
}
entity = entity.replace(/^0+/, '')
if (isNaN(num) || numStr.toLowerCase() !== entity) {
strictFail(parser, 'Invalid character entity')
return '&' + parser.entity + ';'
}
return String.fromCodePoint(num)
}
function beginWhiteSpace (parser, c) {
if (c === '<') {
parser.state = S.OPEN_WAKA
parser.startTagPosition = parser.position
} else if (!isWhitespace(c)) {
// have to process this as a text node.
// weird, but happens.
strictFail(parser, 'Non-whitespace before first tag.')
parser.textNode = c
parser.state = S.TEXT
}
}
function charAt (chunk, i) {
var result = ''
if (i < chunk.length) {
result = chunk.charAt(i)
}
return result
}
function write (chunk) {
var parser = this
if (this.error) {
throw this.error
}
if (parser.closed) {
return error(parser,
'Cannot write after close. Assign an onready handler.')
}
if (chunk === null) {
return end(parser)
}
if (typeof chunk === 'object') {
chunk = chunk.toString()
}
var i = 0
var c = ''
while (true) {
c = charAt(chunk, i++)
parser.c = c
if (!c) {
break
}
if (parser.trackPosition) {
parser.position++
if (c === '\n') {
parser.line++
parser.column = 0
} else {
parser.column++
}
}
switch (parser.state) {
case S.BEGIN:
parser.state = S.BEGIN_WHITESPACE
if (c === '\uFEFF') {
continue
}
beginWhiteSpace(parser, c)
continue
case S.BEGIN_WHITESPACE:
beginWhiteSpace(parser, c)
continue
case S.TEXT:
if (parser.sawRoot && !parser.closedRoot) {
var starti = i - 1
while (c && c !== '<' && c !== '&') {
c = charAt(chunk, i++)
if (c && parser.trackPosition) {
parser.position++
if (c === '\n') {
parser.line++
parser.column = 0
} else {
parser.column++
}
}
}
parser.textNode += chunk.substring(starti, i - 1)
}
if (c === '<' && !(parser.sawRoot && parser.closedRoot && !parser.strict)) {
parser.state = S.OPEN_WAKA
parser.startTagPosition = parser.position
} else {
if (!isWhitespace(c) && (!parser.sawRoot || parser.closedRoot)) {
strictFail(parser, 'Text data outside of root node.')
}
if (c === '&') {
parser.state = S.TEXT_ENTITY
} else {
parser.textNode += c
}
}
continue
case S.SCRIPT:
// only non-strict
if (c === '<') {
parser.state = S.SCRIPT_ENDING
} else {
parser.script += c
}
continue
case S.SCRIPT_ENDING:
if (c === '/') {
parser.state = S.CLOSE_TAG
} else {
parser.script += '<' + c
parser.state = S.SCRIPT
}
continue
case S.OPEN_WAKA:
// either a /, ?, !, or text is coming next.
if (c === '!') {
parser.state = S.SGML_DECL
parser.sgmlDecl = ''
} else if (isWhitespace(c)) {
// wait for it...
} else if (isMatch(nameStart, c)) {
parser.state = S.OPEN_TAG
parser.tagName = c
} else if (c === '/') {
parser.state = S.CLOSE_TAG
parser.tagName = ''
} else if (c === '?') {
parser.state = S.PROC_INST
parser.procInstName = parser.procInstBody = ''
} else {
strictFail(parser, 'Unencoded <')
// if there was some whitespace, then add that in.
if (parser.startTagPosition + 1 < parser.position) {
var pad = parser.position - parser.startTagPosition
c = new Array(pad).join(' ') + c
}
parser.textNode += '<' + c
parser.state = S.TEXT
}
continue
case S.SGML_DECL:
if ((parser.sgmlDecl + c).toUpperCase() === CDATA) {
emitNode(parser, 'onopencdata')
parser.state = S.CDATA
parser.sgmlDecl = ''
parser.cdata = ''
} else if (parser.sgmlDecl + c === '--') {
parser.state = S.COMMENT
parser.comment = ''
parser.sgmlDecl = ''
} else if ((parser.sgmlDecl + c).toUpperCase() === DOCTYPE) {
parser.state = S.DOCTYPE
if (parser.doctype || parser.sawRoot) {
strictFail(parser,
'Inappropriately located doctype declaration')
}
parser.doctype = ''
parser.sgmlDecl = ''
} else if (c === '>') {
emitNode(parser, 'onsgmldeclaration', parser.sgmlDecl)
parser.sgmlDecl = ''
parser.state = S.TEXT
} else if (isQuote(c)) {
parser.state = S.SGML_DECL_QUOTED
parser.sgmlDecl += c
} else {
parser.sgmlDecl += c
}
continue
case S.SGML_DECL_QUOTED:
if (c === parser.q) {
parser.state = S.SGML_DECL
parser.q = ''
}
parser.sgmlDecl += c
continue
case S.DOCTYPE:
if (c === '>') {
parser.state = S.TEXT
emitNode(parser, 'ondoctype', parser.doctype)
parser.doctype = true // just remember that we saw it.
} else {
parser.doctype += c
if (c === '[') {
parser.state = S.DOCTYPE_DTD
} else if (isQuote(c)) {
parser.state = S.DOCTYPE_QUOTED
parser.q = c
}
}
continue
case S.DOCTYPE_QUOTED:
parser.doctype += c
if (c === parser.q) {
parser.q = ''
parser.state = S.DOCTYPE
}
continue
case S.DOCTYPE_DTD:
parser.doctype += c
if (c === ']') {
parser.state = S.DOCTYPE
} else if (isQuote(c)) {
parser.state = S.DOCTYPE_DTD_QUOTED
parser.q = c
}
continue
case S.DOCTYPE_DTD_QUOTED:
parser.doctype += c
if (c === parser.q) {
parser.state = S.DOCTYPE_DTD
parser.q = ''
}
continue
case S.COMMENT:
if (c === '-') {
parser.state = S.COMMENT_ENDING
} else {
parser.comment += c
}
continue
case S.COMMENT_ENDING:
if (c === '-') {
parser.state = S.COMMENT_ENDED
parser.comment = textopts(parser.opt, parser.comment)
if (parser.comment) {
emitNode(parser, 'oncomment', parser.comment)
}
parser.comment = ''
} else {
parser.comment += '-' + c
parser.state = S.COMMENT
}
continue
case S.COMMENT_ENDED:
if (c !== '>') {
strictFail(parser, 'Malformed comment')
// allow <!-- blah -- bloo --> in non-strict mode,
// which is a comment of " blah -- bloo "
parser.comment += '--' + c
parser.state = S.COMMENT
} else {
parser.state = S.TEXT
}
continue
case S.CDATA:
if (c === ']') {
parser.state = S.CDATA_ENDING
} else {
parser.cdata += c
}
continue
case S.CDATA_ENDING:
if (c === ']') {
parser.state = S.CDATA_ENDING_2
} else {
parser.cdata += ']' + c
parser.state = S.CDATA
}
continue
case S.CDATA_ENDING_2:
if (c === '>') {
if (parser.cdata) {
emitNode(parser, 'oncdata', parser.cdata)
}
emitNode(parser, 'onclosecdata')
parser.cdata = ''
parser.state = S.TEXT
} else if (c === ']') {
parser.cdata += ']'
} else {
parser.cdata += ']]' + c
parser.state = S.CDATA
}
continue
case S.PROC_INST:
if (c === '?') {
parser.state = S.PROC_INST_ENDING
} else if (isWhitespace(c)) {
parser.state = S.PROC_INST_BODY
} else {
parser.procInstName += c
}
continue
case S.PROC_INST_BODY:
if (!parser.procInstBody && isWhitespace(c)) {
continue
} else if (c === '?') {
parser.state = S.PROC_INST_ENDING
} else {
parser.procInstBody += c
}
continue
case S.PROC_INST_ENDING:
if (c === '>') {
emitNode(parser, 'onprocessinginstruction', {
name: parser.procInstName,
body: parser.procInstBody
})
parser.procInstName = parser.procInstBody = ''
parser.state = S.TEXT
} else {
parser.procInstBody += '?' + c
parser.state = S.PROC_INST_BODY
}
continue
case S.OPEN_TAG:
if (isMatch(nameBody, c)) {
parser.tagName += c
} else {
newTag(parser)
if (c === '>') {
openTag(parser)
} else if (c === '/') {
parser.state = S.OPEN_TAG_SLASH
} else {
if (!isWhitespace(c)) {
strictFail(parser, 'Invalid character in tag name')
}
parser.state = S.ATTRIB
}
}
continue
case S.OPEN_TAG_SLASH:
if (c === '>') {
openTag(parser, true)
closeTag(parser)
} else {
strictFail(parser, 'Forward-slash in opening tag not followed by >')
parser.state = S.ATTRIB
}
continue
case S.ATTRIB:
// haven't read the attribute name yet.
if (isWhitespace(c)) {
continue
} else if (c === '>') {
openTag(parser)
} else if (c === '/') {
parser.state = S.OPEN_TAG_SLASH
} else if (isMatch(nameStart, c)) {
parser.attribName = c
parser.attribValue = ''
parser.state = S.ATTRIB_NAME
} else {
strictFail(parser, 'Invalid attribute name')
}
continue
case S.ATTRIB_NAME:
if (c === '=') {
parser.state = S.ATTRIB_VALUE
} else if (c === '>') {
strictFail(parser, 'Attribute without value')
parser.attribValue = parser.attribName
attrib(parser)
openTag(parser)
} else if (isWhitespace(c)) {
parser.state = S.ATTRIB_NAME_SAW_WHITE
} else if (isMatch(nameBody, c)) {
parser.attribName += c
} else {
strictFail(parser, 'Invalid attribute name')
}
continue
case S.ATTRIB_NAME_SAW_WHITE:
if (c === '=') {
parser.state = S.ATTRIB_VALUE
} else if (isWhitespace(c)) {
continue
} else {
strictFail(parser, 'Attribute without value')
parser.tag.attributes[parser.attribName] = ''
parser.attribValue = ''
emitNode(parser, 'onattribute', {
name: parser.attribName,
value: ''
})
parser.attribName = ''
if (c === '>') {
openTag(parser)
} else if (isMatch(nameStart, c)) {
parser.attribName = c
parser.state = S.ATTRIB_NAME
} else {
strictFail(parser, 'Invalid attribute name')
parser.state = S.ATTRIB
}
}
continue
case S.ATTRIB_VALUE:
if (isWhitespace(c)) {
continue
} else if (isQuote(c)) {
parser.q = c
parser.state = S.ATTRIB_VALUE_QUOTED
} else {
strictFail(parser, 'Unquoted attribute value')
parser.state = S.ATTRIB_VALUE_UNQUOTED
parser.attribValue = c
}
continue
case S.ATTRIB_VALUE_QUOTED:
if (c !== parser.q) {
if (c === '&') {
parser.state = S.ATTRIB_VALUE_ENTITY_Q
} else {
parser.attribValue += c
}
continue
}
attrib(parser)
parser.q = ''
parser.state = S.ATTRIB_VALUE_CLOSED
continue
case S.ATTRIB_VALUE_CLOSED:
if (isWhitespace(c)) {
parser.state = S.ATTRIB
} else if (c === '>') {
openTag(parser)
} else if (c === '/') {
parser.state = S.OPEN_TAG_SLASH
} else if (isMatch(nameStart, c)) {
strictFail(parser, 'No whitespace between attributes')
parser.attribName = c
parser.attribValue = ''
parser.state = S.ATTRIB_NAME
} else {
strictFail(parser, 'Invalid attribute name')
}
continue
case S.ATTRIB_VALUE_UNQUOTED:
if (!isAttribEnd(c)) {
if (c === '&') {
parser.state = S.ATTRIB_VALUE_ENTITY_U
} else {
parser.attribValue += c
}
continue
}
attrib(parser)
if (c === '>') {
openTag(parser)
} else {
parser.state = S.ATTRIB
}
continue
case S.CLOSE_TAG:
if (!parser.tagName) {
if (isWhitespace(c)) {
continue
} else if (notMatch(nameStart, c)) {
if (parser.script) {
parser.script += '</' + c
parser.state = S.SCRIPT
} else {
strictFail(parser, 'Invalid tagname in closing tag.')
}
} else {
parser.tagName = c
}
} else if (c === '>') {
closeTag(parser)
} else if (isMatch(nameBody, c)) {
parser.tagName += c
} else if (parser.script) {
parser.script += '</' + parser.tagName
parser.tagName = ''
parser.state = S.SCRIPT
} else {
if (!isWhitespace(c)) {
strictFail(parser, 'Invalid tagname in closing tag')
}
parser.state = S.CLOSE_TAG_SAW_WHITE
}
continue
case S.CLOSE_TAG_SAW_WHITE:
if (isWhitespace(c)) {
continue
}
if (c === '>') {
closeTag(parser)
} else {
strictFail(parser, 'Invalid characters in closing tag')
}
continue
case S.TEXT_ENTITY:
case S.ATTRIB_VALUE_ENTITY_Q:
case S.ATTRIB_VALUE_ENTITY_U:
var returnState
var buffer
switch (parser.state) {
case S.TEXT_ENTITY:
returnState = S.TEXT
buffer = 'textNode'
break
case S.ATTRIB_VALUE_ENTITY_Q:
returnState = S.ATTRIB_VALUE_QUOTED
buffer = 'attribValue'
break
case S.ATTRIB_VALUE_ENTITY_U:
returnState = S.ATTRIB_VALUE_UNQUOTED
buffer = 'attribValue'
break
}
if (c === ';') {
parser[buffer] += parseEntity(parser)
parser.entity = ''
parser.state = returnState
} else if (isMatch(parser.entity.length ? entityBody : entityStart, c)) {
parser.entity += c
} else {
strictFail(parser, 'Invalid character in entity name')
parser[buffer] += '&' + parser.entity + c
parser.entity = ''
parser.state = returnState
}
continue
default:
throw new Error(parser, 'Unknown state: ' + parser.state)
}
} // while
if (parser.position >= parser.bufferCheckPosition) {
checkBufferLength(parser)
}
return parser
}
/*! http://mths.be/fromcodepoint v0.1.0 by @mathias */
/* istanbul ignore next */
if (!String.fromCodePoint) {
(function () {
var stringFromCharCode = String.fromCharCode
var floor = Math.floor
var fromCodePoint = function () {
var MAX_SIZE = 0x4000
var codeUnits = []
var highSurrogate
var lowSurrogate
var index = -1
var length = arguments.length
if (!length) {
return ''
}
var result = ''
while (++index < length) {
var codePoint = Number(arguments[index])
if (
!isFinite(codePoint) || // `NaN`, `+Infinity`, or `-Infinity`
codePoint < 0 || // not a valid Unicode code point
codePoint > 0x10FFFF || // not a valid Unicode code point
floor(codePoint) !== codePoint // not an integer
) {
throw RangeError('Invalid code point: ' + codePoint)
}
if (codePoint <= 0xFFFF) { // BMP code point
codeUnits.push(codePoint)
} else { // Astral code point; split in surrogate halves
// http://mathiasbynens.be/notes/javascript-encoding#surrogate-formulae
codePoint -= 0x10000
highSurrogate = (codePoint >> 10) + 0xD800
lowSurrogate = (codePoint % 0x400) + 0xDC00
codeUnits.push(highSurrogate, lowSurrogate)
}
if (index + 1 === length || codeUnits.length > MAX_SIZE) {
result += stringFromCharCode.apply(null, codeUnits)
codeUnits.length = 0
}
}
return result
}
/* istanbul ignore next */
if (Object.defineProperty) {
Object.defineProperty(String, 'fromCodePoint', {
value: fromCodePoint,
configurable: true,
writable: true
})
} else {
String.fromCodePoint = fromCodePoint
}
}())
}
})(typeof exports === 'undefined' ? this.sax = {} : exports)