all da files

This commit is contained in:
jllord
2013-05-27 13:45:59 -07:00
commit 59d3d30afa
6704 changed files with 1954956 additions and 0 deletions

View File

@@ -0,0 +1,4 @@
support
test
examples
*.sock

View File

@@ -0,0 +1,5 @@
language: node_js
node_js:
- 0.4
- 0.6
- 0.7

View File

@@ -0,0 +1,18 @@
0.0.3 / 2012-05-29
==================
* compatible with node 4.x
* added travis support
0.0.2 / 2012-05-27
==================
* Now supports for node 0.7.x
* Commented out tests for features that will not be supported
* Down to 19/156 failed tests - thanks to @FB55!
0.0.1 / 2012-05-23
==================
* Initial release

View File

@@ -0,0 +1,7 @@
test:
@./node_modules/mocha/bin/mocha --reporter list
subl:
@subl lib/ test/ package.json index.js
.PHONY: test subl

View File

@@ -0,0 +1,51 @@
# cheerio-select [![Build Status](https://secure.travis-ci.org/MatthewMueller/cheerio-select.png?branch=master)](http://travis-ci.org/MatthewMueller/cheerio-select)
Tiny wrapper around FB55's excellent [CSSselect](https://github.com/FB55/CSSselect) library.
cheerio-select provides a comprehensive test suite based on sizzle's test suite.
> Warning: Currently, not all tests pass, and some sizzle features will not be supported
## Usage
var select = require('cheerio-select'),
parse = require('cheerio').parse,
dom = parse('<ul id = "fruits"><li class = "apple">Apple</li></ul>');
select('#fruits > .apple', dom);
=> [{...}]
## TODO
* Get all the unit tests to pass!
## Run tests
npm install
make test
## License
(The MIT License)
Copyright (c) 2012 Matt Mueller &lt;mattmuelle@gmail.com&gt;
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
'Software'), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice shall be
included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

View File

@@ -0,0 +1,9 @@
exports = module.exports = require('./lib/select');
/*
Export the version
*/
exports.version = (function() {
var pkg = require('fs').readFileSync(__dirname + '/package.json', 'utf8');
return JSON.parse(pkg).version;
})();

View File

@@ -0,0 +1,37 @@
/*
* Module dependencies
*/
var CSSselect = require('CSSselect'),
isArray = Array.isArray;
/*
* Select function
*/
exports = module.exports = function(query, dom) {
dom = normalize(dom);
return CSSselect.iterate(query, dom);
};
/*
* Normalize the dom
*/
var normalize = exports.normalize = function(dom) {
dom = dom.cheerio ? dom.toArray() : dom;
dom = isArray(dom) ? dom : [dom];
var len = dom.length,
out = [],
elem;
for(var i = 0; i < len; i++) {
elem = dom[i];
if(elem.type === 'root') {
out = out.concat(elem.children || []);
} else {
out.push(elem);
}
}
return out;
};

View File

@@ -0,0 +1,8 @@
language: node_js
node_js:
- 0.4
- 0.6
- 0.8
- 0.9
notifications:
email: false

View File

@@ -0,0 +1,11 @@
Copyright (c) Felix Böhm
All rights reserved.
Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
THIS IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS,
EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

View File

@@ -0,0 +1,20 @@
#CSSselect [![Build Status](https://secure.travis-ci.org/fb55/CSSselect.png?branch=master)](http://travis-ci.org/fb55/CSSselect)
##What?
CSSselect is CSS selector engine. It returns a function that tests elements if they match a selector - checking needs to happen "from the top", like browser engines execute queries.
##Why?
Just take the following CSS query: `foo bar baz`. When the element named `baz` has like a billion children, every one of them needs to be checked if they match a query. Three times, to be precise, if you run a CSS query from the start to the end (as e.g. JSDOM does). Yup, that's slow.
This library checks every element once. The more complex the query, the greater the benefit.
##How?
By stacking functions!
##TODO
1. The API needs to be improved
2. Documentation needs to be written

View File

@@ -0,0 +1,35 @@
function isElement(elem){
return elem.nodeType === 1;
}
function getSiblings(elem){
var parent = getParent(elem);
return parent && getChildren(parent);
}
function getChildren(elem){
return elem.childNodes;
}
function getParent(elem){
return elem.parentElement;
}
function getAttributeValue(elem, name){
return elem.attributes[name].value;
}
function hasAttrib(elem, name){
return name in elem.attributes;
}
function getName(elem){
return elem.tagName.toLowerCase();
}
//https://github.com/ded/qwery/blob/master/pseudos/qwery-pseudos.js#L47-54
function getText(elem) {
var str = "",
childs = getChildren(elem);
if(!childs) return str;
for(var i = 0; i < childs.length; i++){
if(isElem(childs[i]) str += el.textContent || el.innerText || getText(childs[i])
}
return s;
}

View File

@@ -0,0 +1,736 @@
;(function(global, CSSwhat){
"use strict";
//functions that make porting the library to another DOM easy
function isElement(elem){
return elem.type === "tag" || elem.type === "style" || elem.type === "script";
}
function getChildren(elem){
return elem.children;
}
function getParent(elem){
return elem.parent;
}
function getAttributeValue(elem, name){
return elem.attribs[name];
}
function hasAttrib(elem, name){
return elem.attribs && name in elem.attribs;
}
function getName(elem){
return elem.name;
}
function getText(elem){
var text = "",
childs = getChildren(elem);
if(!childs) return text;
for(var i = 0, j = childs.length; i < j; i++){
if(isElement(childs[i])) text += getText(childs[i]);
else text += childs[i].data;
}
return text;
}
/*
pseudo selectors
---
they are available in two forms:
* filters called when the selector
is compiled and return a function
that needs to return next()
* pseudos get called on execution
they need to return a boolean
*/
var filters = {
not: function(next, select){
var func = parse(select);
if(func === falseFunc){
if(next === rootFunc) return trueFunc;
else return next;
}
if(func === trueFunc) return falseFunc;
if(func === rootFunc) return falseFunc;
return function(elem){
if(!func(elem)) return next(elem);
};
},
contains: function(next, text){
if(
(text.charAt(0) === "\"" || text.charAt(0) === "'") &&
text.charAt(0) === text.substr(-1)
){
text = text.slice(1, -1);
}
return function(elem){
if(getText(elem).indexOf(text) !== -1) return next(elem);
};
},
has: function(next, select){
var func = parse(select);
if(func === rootFunc || func === trueFunc) return next;
if(func === falseFunc) return falseFunc;
var proc = function(elem){
var children = getChildren(elem);
if(!children) return;
for(var i = 0, j = children.length; i < j; i++){
if(!isElement(children[i])) continue;
if(func(children[i])) return true;
if(proc(children[i])) return true;
}
};
return function proc(elem){
if(proc(elem)) return next(elem);
};
},
root: function(next){
return function(elem){
if(!getParent(elem)) return next(elem);
};
},
empty: function(next){
return function(elem){
var children = getChildren(elem);
if(!children || children.length === 0) return next(elem);
};
},
parent: function(next){ //:parent is the inverse of :empty
return function(elem){
var children = getChildren(elem);
if(children && children.length !== 0) return next(elem);
};
},
//location specific methods
//first- and last-child methods return as soon as they find another element
"first-child": function(next){
return function(elem){
if(getFirstElement(getSiblings(elem)) === elem) return next(elem);
};
},
"last-child": function(next){
return function(elem){
var siblings = getSiblings(elem);
if(!siblings) return;
for(var i = siblings.length-1; i >= 0; i--){
if(siblings[i] === elem) return next(elem);
if(isElement(siblings[i])) return;
}
};
},
"first-of-type": function(next){
return function(elem){
var siblings = getSiblings(elem);
if(!siblings) return;
for(var i = 0, j = siblings.length; i < j; i++){
if(siblings[i] === elem) return next(elem);
if(getName(siblings[i]) === getName(elem)) return;
}
};
},
"last-of-type": function(next){
return function(elem){
var siblings = getSiblings(elem);
if(!siblings) return;
for(var i = siblings.length-1; i >= 0; i--){
if(siblings[i] === elem) return next(elem);
if(getName(siblings[i]) === getName(elem)) return;
}
};
},
"only-of-type": function(next){
return function(elem){
var siblings = getSiblings(elem);
if(!siblings) return;
for(var i = 0, j = siblings.length; i < j; i++){
if(siblings[i] === elem) continue;
if(getName(siblings[i]) === getName(elem)) return;
}
return next(elem);
};
},
"only-child": function(next){
return function(elem){
var siblings = getSiblings(elem);
if(!siblings) return;
if(siblings.length === 1) return next(elem);
for(var i = 0, j = siblings.length; i < j; i++){
if(isElement(siblings[i]) && siblings[i] !== elem) return;
}
return next(elem);
};
},
"nth-child": function(next, rule){
var func = getNCheck(rule);
if(func === falseFunc) return func;
if(func === trueFunc){
if(next === rootFunc) return func;
else return next;
}
return function(elem){
if(func(getIndex(elem))) return next(elem);
};
},
"nth-last-child": function(next, rule){
var func = getNCheck(rule);
if(func === falseFunc) return func;
if(func === trueFunc){
if(next === rootFunc) return func;
else return next;
}
return function(elem){
var siblings = getSiblings(elem);
if(!siblings) return;
for(var pos = 0, i = siblings.length - 1; i >= 0; i--){
if(siblings[i] === elem){
if(func(pos)) return next(elem);
return;
}
if(isElement(siblings[i])) pos++;
}
};
},
"nth-of-type": function(next, rule){
var func = getNCheck(rule);
if(func === falseFunc) return func;
if(func === trueFunc){
if(next === rootFunc) return func;
else return next;
}
return function(elem){
var siblings = getSiblings(elem);
if(!siblings) return;
for(var pos = 0, i = 0, j = siblings.length; i < j; i++){
if(siblings[i] === elem){
if(func(pos)) return next(elem);
return;
}
if(getName(siblings[i]) === getName(elem)) pos++;
}
};
},
"nth-last-of-type": function(next, rule){
var func = getNCheck(rule);
if(func === falseFunc) return func;
if(func === trueFunc){
if(next === rootFunc) return func;
else return next;
}
return function(elem){
var siblings = getSiblings(elem);
if(!siblings) return;
for(var pos = 0, i = siblings.length-1; i >= 0; i--){
if(siblings[i] === elem){
if(func(pos)) return next(elem);
return;
}
if(getName(siblings[i]) === getName(elem)) pos++;
}
};
},
//forms
//to consider: :target, :enabled
selected: function(next){
return function(elem){
if(hasAttrib(elem, "selected")) return next(elem);
//the first <option> in a <select> is also selected
//TODO this only works for direct descendents
if(getName(getParent(elem)) !== "option") return;
if(getFirstElement(getSiblings(elem)) === elem) return next(elem);
};
},
disabled: function(next){
return function(elem){
if(hasAttrib(elem, "disabled")) return next(elem);
};
},
enabled: function(next){
return function(elem){
if(!hasAttrib(elem, "disabled")) return next(elem);
};
},
checked: function(next){
return function(elem){
if(hasAttrib(elem, "checked")) return next(elem);
};
},
//jQuery extensions
header: function(next){
return function(elem){
var name = getName(elem);
if(
name === "h1" ||
name === "h2" ||
name === "h3" ||
name === "h4" ||
name === "h5" ||
name === "h6"
) return next(elem);
};
},
button: function(next){
return function(elem){
if(
getName(elem) === "button" ||
getName(elem) === "input" &&
hasAttrib(elem, "type") &&
getAttributeValue(elem, "type") === "button"
) return next(elem);
};
},
input: function(next){
return function(elem){
var name = getName(elem);
if(
name === "input" ||
name === "textarea" ||
name === "select" ||
name === "button"
) return next(elem);
};
},
text: function(next){
return function(elem){
if(getName(elem) !== "input") return;
if(
!hasAttrib(elem, "type") ||
getAttributeValue(elem, "type") === "text"
) return next(elem);
};
},
checkbox: getAttribFunc("type", "checkbox"),
file: getAttribFunc("type", "file"),
password: getAttribFunc("type", "password"),
radio: getAttribFunc("type", "radio"),
reset: getAttribFunc("type", "reset"),
image: getAttribFunc("type", "image"),
submit: getAttribFunc("type", "submit")
};
//while filters are precompiled, pseudos get called when they are needed
var pseudos = {};
//helper methods
function getSiblings(elem){
return getParent(elem) && getChildren(getParent(elem));
}
/*
finds the position of an element among its siblings
*/
function getIndex(elem){
var siblings = getSiblings(elem);
if(!siblings) return -1;
for(var count = 0, i = 0, j = siblings.length; i < j; i++){
if(siblings[i] === elem) return count;
if(isElement(siblings[i])) count++;
}
return -1;
}
function getFirstElement(elems){
if(!elems) return;
for(var i = 0, j = elems.length; i < j; i++){
if(isElement(elems[i])) return elems[i];
}
}
/*
returns a function that checks if an elements index matches the given rule
highly optimized to return the fastest solution
*/
var re_nthElement = /^([+\-]?\d*n)?\s*(?:([+\-]?)\s*(\d+))?$/;
function getNCheck(formula){
var a, b;
//parse the formula
//b is lowered by 1 as the rule uses index 1 as the start
formula = formula.trim().toLowerCase();
if(formula === "even"){
a = 2;
b = -1;
} else if(formula === "odd"){
a = 2;
b = 0;
}
else {
formula = formula.match(re_nthElement);
if(!formula){
//TODO forward rule to error
throw new SyntaxError("n-th rule couldn't be parsed");
}
if(formula[1]){
a = parseInt(formula[1], 10);
if(!a){
if(formula[1].charAt(0) === "-") a = -1;
else a = 1;
}
} else a = 0;
if(formula[3]) b = parseInt((formula[2] || "") + formula[3], 10) - 1;
else b = -1;
}
//when b <= 0, a*n won't be possible for any matches when a < 0
//besides, the specification says that no element is matched when a and b are 0
if(b < 0 && a <= 0) return falseFunc;
//when b <= 0 and a === 1, they match any element
if(b < 0 && a === 1) return trueFunc;
//when a is in the range -1..1, it matches any element (so only b is checked)
if(a ===-1) return function(pos){ return pos <= b; };
if(a === 1) return function(pos){ return pos >= b; };
if(a === 0) return function(pos){ return pos === b; };
//when a > 0, modulo can be used to check if there is a match
//TODO: needs to be checked
if(a > 1) return function(pos){
return pos >= 0 && (pos -= b) >= 0 && (pos % a) === 0;
};
a *= -1; //make a positive
return function(pos){
return pos >= 0 && (pos -= b) >= 0 && (pos % a) === 0 && pos/a < b;
};
}
function getAttribFunc(name, value){
return function(next){
return checkAttrib(next, name, value);
};
}
function checkAttrib(next, name, value){
return function(elem){
if(hasAttrib(elem, name) && getAttributeValue(elem, name) === value){
return next(elem);
}
};
}
function rootFunc(){
return true;
}
function trueFunc(){
return true;
}
function falseFunc(){
return false;
}
/*
all available rules
*/
var generalRules = {
__proto__: null,
//tags
tag: function(next, data){
var name = data.name;
return function(elem){
if(getName(elem) === name) return next(elem);
};
},
//traversal
descendant: function(next){
return function(elem){
while(elem = getParent(elem)){
if(next(elem)) return true;
}
};
},
child: function(next){
return function(elem){
var parent = getParent(elem);
if(parent) return next(parent);
};
},
sibling: function(next){
return function(elem){
var siblings = getSiblings(elem);
if(!siblings) return;
for(var i = 0, j = siblings.length; i < j; i++){
if(!isElement(siblings[i])) continue;
if(siblings[i] === elem) return;
if(next(siblings[i])) return true;
}
};
},
adjacent: function(next){
return function(elem){
var siblings = getSiblings(elem),
lastElement;
if(!siblings) return;
for(var i = 0, j = siblings.length; i < j; i++){
if(isElement(siblings[i])){
if(siblings[i] === elem){
if(lastElement) return next(lastElement);
return;
}
lastElement = siblings[i];
}
}
};
},
universal: function(next){
if(next === rootFunc) return trueFunc;
return next;
},
//attributes
attribute: function(next, data){
if(data.ignoreCase){
return noCaseAttributeRules[data.action](next, data.name, data.value, data.ignoreCase);
} else {
return attributeRules[data.action](next, data.name, data.value, data.ignoreCase);
}
},
//pseudos
pseudo: function(next, data){
var name = data.name,
subselect = data.data;
if(name in filters) return filters[name](next, subselect);
else if(name in pseudos){
return function(elem){
if(pseudos[name](elem, subselect)) return next(elem);
};
} else {
throw new SyntaxError("unmatched pseudo-class: " + name);
}
}
};
/*
attribute selectors
*/
var reChars = /[-[\]{}()*+?.,\\^$|#\s]/g; //https://github.com/slevithan/XRegExp/blob/master/src/xregexp.js#L469
function escapeRe(str){
return str.replace(reChars, "\\$&");
}
function wrapReRule(pre, post){
return function(next, name, value, ignoreCase){
var regex = new RegExp(pre + escapeRe(value) + post, ignoreCase ? "i" : "");
return function(elem){
if(hasAttrib(elem, name) && regex.test(getAttributeValue(elem, name))) return next(elem);
};
};
}
var noCaseAttributeRules = {
__proto__: null,
exists: function(next, name){
return function(elem){
if(hasAttrib(elem, name)) return next(elem);
};
},
element: wrapReRule("(?:^|\\s)", "(?:$|\\s)"),
equals: wrapReRule("^", "$"),
hyphen: wrapReRule("^", "(?:$|-)"),
start: wrapReRule("^", ""),
end: wrapReRule("", "$"),
any: wrapReRule("", ""),
not: wrapReRule("^(?!^", "$)")
};
var attributeRules = {
__proto__: null,
equals: checkAttrib,
exists: noCaseAttributeRules.exists,
hyphen: noCaseAttributeRules.hyphen,
element: noCaseAttributeRules.element,
start: function(next, name, value){
var len = value.length;
return function(elem){
if(
hasAttrib(elem, name) &&
getAttributeValue(elem, name).substr(0, len) === value
) return next(elem);
};
},
end: function(next, name, value){
var len = -value.length;
return function(elem){
if(
hasAttrib(elem, name) &&
getAttributeValue(elem, name).substr(len) === value
) return next(elem);
};
},
any: function(next, name, value){
return function(elem){
if(
hasAttrib(elem, name) &&
getAttributeValue(elem, name).indexOf(value) >= 0
) return next(elem);
};
},
not: function(next, name, value){
if(value === ""){
return function(elem){
if(hasAttrib(elem, name) && getAttributeValue(elem, name) !== "") return next(elem);
};
}
return function(elem){
if(!hasAttrib(elem, name) || getAttributeValue(elem, name) !== value){
return next(elem);
}
};
}
};
/*
sort the parts of the passed selector,
as there is potential for optimization
*/
var procedure = {
__proto__: null,
universal: 5, //should be last so that it can be ignored
tag: 3, //very quick test
attribute: 1, //can be faster than class
pseudo: 0, //can be pretty expensive (especially :has)
//everything else shouldn't be moved
descendant: -1,
child: -1,
sibling: -1,
adjacent: -1
};
function sortByProcedure(arr){
//TODO optimize, sort individual attribute selectors
var parts = [],
last = 0,
end = false;
for(var i = 0, j = arr.length-1; i <= j; i++){
if(procedure[arr[i].type] === -1 || (end = i === j)){
if(end) i++;
parts = parts.concat(arr.slice(last, i).sort(function(a, b){
return procedure[a.type] - procedure[b.type];
}));
if(!end) last = parts.push(arr[i]);
}
}
return parts;
}
function parse(selector){
var functions = CSSwhat(selector).map(function(arr){
var func = rootFunc;
arr = sortByProcedure(arr);
for(var i = 0, j = arr.length; i < j; i++){
func = generalRules[arr[i].type](func, arr[i]);
if(func === falseFunc) return func;
}
return func;
}).filter(function(func){
return func !== rootFunc && func !== falseFunc;
});
var num = functions.length;
if(num === 0) return falseFunc;
if(num === 1) return functions[0];
if(functions.indexOf(trueFunc) >= 0) return trueFunc;
return function(elem){
for(var i = 0; i < num; i++){
if(functions[i](elem)) return true;
}
return false;
};
}
/*
the exported interface
*/
var CSSselect = function(query, elems){
if(typeof query !== "function") query = parse(query);
if(arguments.length === 1) return query;
return CSSselect.iterate(query, elems);
};
CSSselect.parse = parse;
CSSselect.filters = filters;
CSSselect.pseudos = pseudos;
CSSselect.iterate = function(query, elems){
if(typeof query !== "function") query = parse(query);
if(query === falseFunc) return [];
if(!Array.isArray(elems)) elems = getChildren(elems);
return iterate(query, elems);
};
CSSselect.is = function(elem, query){
if(typeof query !== "function") query = parse(query);
return query(elem);
};
function iterate(query, elems){
var result = [];
for(var i = 0, j = elems.length; i < j; i++){
if(!isElement(elems[i])) continue;
if(query(elems[i])) result.push(elems[i]);
if(getChildren(elems[i])) result = result.concat(iterate(query, getChildren(elems[i])));
}
return result;
}
/*
export CSSselect
*/
if(typeof module !== "undefined" && "exports" in module){
module.exports = CSSselect;
} else {
if(typeof define === "function" && define.amd){
define("CSSselect", function(){
return CSSselect;
});
}
global.CSSselect = CSSselect;
}
})(
typeof window === "object" ? window : this,
typeof CSSwhat === "undefined" ? require("CSSwhat") : CSSwhat
);

View File

@@ -0,0 +1,5 @@
language: node_js
node_js:
- 0.4
- 0.6
- 0.7

View File

@@ -0,0 +1,11 @@
Copyright (c) Felix Böhm
All rights reserved.
Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
THIS IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS,
EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

View File

@@ -0,0 +1,136 @@
;(function(global){ "use strict";
//regexps
var re_name = /^(?:\\.|[\w\-\u00c0-\uFFFF])+/,
re_cleanSelector = /([^\\])\s*([>~+]|$)\s*/g,
re_nthElement = /^([+\-]?\d*n)?\s*([+\-])?\s*(\d)?$/,
re_escapedCss = /\\(\d{6}|.)/g,
re_nonNumeric = /^\D$/,
re_attr = /^\s*((?:\\.|[\w\u00c0-\uFFFF\-])+)\s*(?:(\S?)=\s*(?:(['"])(.*?)\3|(#?(?:\\.|[\w\u00c0-\uFFFF\-])*)|)|)\s*(i)?\]/; //https://github.com/jquery/sizzle/blob/master/sizzle.js#L374
var actionTypes = {
__proto__: null,
"undefined": "exists",
"": "equals",
"~": "element",
"^": "start",
"$": "end",
"*": "any",
"!": "not",
"|": "hyphen"
};
var simpleSelectors = {
__proto__: null,
">": "child",
"~": "sibling",
"+": "adjacent",
"*": "universal"
};
var attribSelectors = {
__proto__: null,
"#": ["id", "equals"],
".": ["class", "element"]
};
function unescapeCSS(str){
//based on http://mathiasbynens.be/notes/css-escapes
//TODO support short sequences (/\\\d{1,5} /)
return str.replace(re_escapedCss, function(m, s){
if (re_nonNumeric.test(s)) return s;
return String.fromCharCode(parseInt(s, 10));
});
}
function getClosingPos(selector){
for(var pos = 1, counter = 1, len = selector.length; counter > 0 && pos < len; pos++){
if(selector.charAt(pos) === "(") counter++;
else if(selector.charAt(pos) === ")") counter--;
}
return pos;
}
function parse(selector){
selector = (selector + "").trimLeft().replace(re_cleanSelector, "$1$2");
var subselects = [],
tokens = [],
data, firstChar, name;
function getName(){
var sub = selector.match(re_name)[0];
selector = selector.substr(sub.length);
return unescapeCSS(sub);
}
while(selector !== ""){
if(re_name.test(selector)){
tokens.push({type: "tag", name: getName().toLowerCase()});
} else if(/^\s/.test(selector)){
tokens.push({type: "descendant"});
selector = selector.trimLeft();
} else {
firstChar = selector.charAt(0);
selector = selector.substr(1);
if(firstChar in simpleSelectors){
tokens.push({type: simpleSelectors[firstChar]});
} else if(firstChar in attribSelectors){
tokens.push({
type: "attribute",
name: attribSelectors[firstChar][0],
action: attribSelectors[firstChar][1],
value: getName(),
ignoreCase: false
});
} else if(firstChar === "["){
data = selector.match(re_attr);
selector = selector.substr(data[0].length);
tokens.push({
type: "attribute",
name: unescapeCSS(data[1]),
action: actionTypes[data[2]],
value: unescapeCSS(data[4] || data[5] || ""),
ignoreCase: !!data[6]
});
} else if(firstChar === ":"){
//if(selector.charAt(0) === ":"){} //TODO pseudo-element
name = getName();
data = "";
if(selector.charAt(0) === "("){
var pos = getClosingPos(selector);
data = selector.substr(1, pos - 2);
selector = selector.substr(pos);
}
tokens.push({type: "pseudo", name: name, data: data});
} else if(firstChar === ","){
subselects.push(tokens);
tokens = [];
} else {
//otherwise, the parser needs to throw or it would enter an infinite loop
throw new Error("Unmatched selector:" + firstChar + selector);
}
}
}
subselects.push(tokens);
return subselects;
}
if(typeof module !== "undefined" && "exports" in module){
module.exports = parse;
} else {
if(typeof define === "function" && define.amd){
define("CSSwhat", function(){
return parse;
});
}
global.CSSwhat = parse;
}
})(typeof window === "object" ? window : this);

View File

@@ -0,0 +1,32 @@
{
"author": {
"name": "Felix Böhm",
"email": "me@feedic.com",
"url": "http://feedic.com"
},
"name": "CSSwhat",
"description": "a CSS selector parser",
"version": "0.1.1",
"repository": {
"url": "https://github.com/FB55/CSSwhat"
},
"main": "/index.js",
"scripts": {
"test": "node tests/test.js"
},
"dependencies": {},
"devDependencies": {},
"optionalDependencies": {},
"engines": {
"node": "*"
},
"license": "BSD-like",
"readme": "#CSSwhat [![Build Status](https://secure.travis-ci.org/FB55/CSSwhat.png?branch=master)](http://travis-ci.org/FB55/CSSwhat)\n\na CSS selector parser\n\n__// TODO__",
"readmeFilename": "readme.md",
"_id": "CSSwhat@0.1.1",
"dist": {
"shasum": "489865be1fe831c4a9f5be82cb0ea2843605d718"
},
"_from": "CSSwhat@>= 0.1",
"_resolved": "https://registry.npmjs.org/CSSwhat/-/CSSwhat-0.1.1.tgz"
}

View File

@@ -0,0 +1,5 @@
#CSSwhat [![Build Status](https://secure.travis-ci.org/FB55/CSSwhat.png?branch=master)](http://travis-ci.org/FB55/CSSwhat)
a CSS selector parser
__// TODO__

View File

@@ -0,0 +1,42 @@
var deepEquals = require("assert").deepEqual,
CSSwhat = require("../");
var tests = [
["div", [ [ { type: 'tag', name: 'div' } ] ], "simple tag"],
["*", [ [ { type: 'universal' } ] ], "universal"],
//traversal
["div div", [ [ { type: 'tag', name: 'div' },
{ type: 'descendant' },
{ type: 'tag', name: 'div' } ] ], "descendant"],
["div\t \n \tdiv", [ [ { type: 'tag', name: 'div' },
{ type: 'descendant' },
{ type: 'tag', name: 'div' } ] ], "descendant /w whitespace"],
["div + div", [ [ { type: 'tag', name: 'div' },
{ type: 'adjacent' },
{ type: 'tag', name: 'div' } ] ], "adjacent"],
["div ~ div", [ [ { type: 'tag', name: 'div' },
{ type: 'sibling' },
{ type: 'tag', name: 'div' } ] ], "sibling"],
//Escaped whitespace
["#\\ > a ", [ [ { type: 'attribute', action: 'equals', name: 'id', value: ' ', ignoreCase: false }, { type: 'child' }, { type: 'tag', name: 'a' } ] ], "Space between escaped space and combinator" ],
[".\\ ", [ [ { type: 'attribute', name: 'class', action: 'element', value: ' ', ignoreCase: false } ] ], "Space after escaped space" ],
//attributes
["[name^='foo[']",[[{"type":"attribute","name":"name","action":"start","value":"foo[","ignoreCase":false}]],"escaped attribute"],
["[name^='foo[bar]']",[[{"type":"attribute","name":"name","action":"start","value":"foo[bar]","ignoreCase":false}]],"escaped attribute"],
["[name$='[bar]']",[[{"type":"attribute","name":"name","action":"end","value":"[bar]","ignoreCase":false}]],"escaped attribute"],
["[href *= 'google']",[[{"type":"attribute","name":"href","action":"any","value":"google","ignoreCase":false}]],"escaped attribute"],
["[name=foo\\.baz]",[[{"type":"attribute","name":"name","action":"equals","value":"foo.baz","ignoreCase":false}]],"escaped attribute"],
["[name=foo\\[bar\\]]",[[{"type":"attribute","name":"name","action":"equals","value":"foo[bar]","ignoreCase":false}]],"escaped attribute"],
["[xml\\:test]",[[{"type":"attribute","name":"xml:test","action":"exists","value":"","ignoreCase":false}]],"escaped attribute"]
//TODO
];
tests.forEach(function(arr){
arr[0] = CSSwhat(arr[0]);
deepEquals.apply(null, arr);
console.log(arr[2], "passed");
});

View File

@@ -0,0 +1,40 @@
{
"name": "CSSselect",
"version": "0.3.1",
"description": "a rtl CSS selector engine",
"author": {
"name": "Felix Boehm",
"email": "me@feedic.com"
},
"keywords": [
"css",
"selector"
],
"main": "index.js",
"engine": "",
"repository": {
"type": "git",
"url": "git://github.com/fb55/cssselect.git"
},
"dependencies": {
"CSSwhat": ">= 0.1"
},
"devDependencies": {
"htmlparser2": ">= 2.2.8",
"cheerio-soupselect": "*",
"mocha": "*",
"expect.js": "*"
},
"scripts": {
"test": "mocha -u exports -R list tests/qwery tests/nwmatcher/scotch.js"
},
"license": "BSD-like",
"readme": "#CSSselect [![Build Status](https://secure.travis-ci.org/fb55/CSSselect.png?branch=master)](http://travis-ci.org/fb55/CSSselect)\n\n##What?\n\nCSSselect is CSS selector engine. It returns a function that tests elements if they match a selector - checking needs to happen \"from the top\", like browser engines execute queries.\n\n##Why?\n\nJust take the following CSS query: `foo bar baz`. When the element named `baz` has like a billion children, every one of them needs to be checked if they match a query. Three times, to be precise, if you run a CSS query from the start to the end (as e.g. JSDOM does). Yup, that's slow.\n\nThis library checks every element once. The more complex the query, the greater the benefit.\n\n##How?\n\nBy stacking functions!\n\n##TODO\n\n1. The API needs to be improved\n2. Documentation needs to be written",
"readmeFilename": "README.md",
"_id": "CSSselect@0.3.1",
"dist": {
"shasum": "ad91c2821658320c5047ba899201a236922c42f9"
},
"_from": "CSSselect@0.x",
"_resolved": "https://registry.npmjs.org/CSSselect/-/CSSselect-0.3.1.tgz"
}

View File

@@ -0,0 +1,10 @@
var CSSselect = require("../"),
ben = require("ben"),
testString = "doo, *#foo > elem.bar[class$=bAz i]:not([ id *= \"2\" ])",
helper = require("./helper.js"),
parse = require("../"),
dom = helper.getDefaultDom();
console.log("Parsing took:", ben(1e5, function(){CSSselect(testString);}));
testString = parse(testString);
console.log("Executing took:", ben(1e6, function(){CSSselect.iterate(testString, dom);})*1e3);

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,27 @@
var htmlparser2 = require("htmlparser2"),
Parser = htmlparser2.Parser,
Handler = htmlparser2.DomHandler,
CSSselect = require("../");
module.exports = {
CSSselect: CSSselect,
getFile: function(name){
return module.exports.getDOM(
require("fs").readFileSync(__dirname + "/docs/" + name).toString()
);
},
getDOM: function(data){
var h = new Handler({refParent: true, ignoreWhitespace: true}),
p = new Parser(h);
p.write(data);
p.end();
return h.dom;
},
getDefaultDom: function(){
return module.exports.getDOM(
"<elem id=foo><elem class='bar baz'><tag class='boom'> This is some simple text </tag></elem></elem>"
);
}
};

View File

@@ -0,0 +1 @@
<html><body>You are being <a href="https://raw.github.com/dperini/nwmatcher/master/LICENSE">redirected</a>.</body></html>

View File

@@ -0,0 +1,359 @@
/*
taken from https://github.com/dperini/nwmatcher/blob/master/test/scotch/test.js
*/
"use strict";
var expect = require("expect.js"),
DomUtils = require("htmlparser2").DomUtils,
helper = require("../helper.js"),
document = helper.getDOM(require("fs").readFileSync(__dirname + "/test.html")+""),
CSSselect = helper.CSSselect;
//Prototype's `$` function
function getById(element){
if(arguments.length === 1){
if(typeof element === "string"){
return DomUtils.getElementById(element, document);
}
return element;
}
else return Array.prototype.map.call(arguments, function(elem){
return getById(elem);
});
}
function assertEquivalent(a, b, msg){
expect(a).to.be.eql(b);
}
function assertEqual(a, b, msg){
expect(a).to.be(b);
}
function assert(a, msg){
expect(a).to.be.ok();
}
function refute(a, msg){
expect(a).to.not.be.ok();
}
//NWMatcher methods
var select = function(query, doc){
if(arguments.length === 1 || typeof doc === "undefined") doc = document;
else if(typeof doc === "string") doc = select(doc);
return CSSselect.iterate(query, doc);
}, match = CSSselect.is;
//The tests...
module.exports = {
"Basic Selectors": {
/*
"*": function(){
//Universal selector
var results = DomUtils.getElementsByTagName("*", document);
assertEquivalent(select("*"), results, "Comment nodes should be ignored.");
},
*/
"E": function(){
//Type selector
var results = [], index = 0, nodes = DomUtils.getElementsByTagName("li", document);
while((results[index] = nodes[index++])){}
results.length--;
assertEquivalent(select("li"), results);
assertEqual(select("strong", getById("fixtures"))[0], getById("strong"));
assertEquivalent(select("nonexistent"), []);
},
"#id": function(){
//ID selector
assertEqual(select("#fixtures")[0], getById("fixtures"));
assertEquivalent(select("nonexistent"), []);
assertEqual(select("#troubleForm")[0], getById("troubleForm"));
},
".class": function(){
//Class selector
assertEquivalent(select(".first"), getById('p', 'link_1', 'item_1'));
assertEquivalent(select(".second"), []);
},
"E#id": function(){
assertEqual(select("strong#strong")[0], getById("strong"));
assertEquivalent(select("p#strong"), []);
},
"E.class": function(){
var secondLink = getById("link_2");
assertEquivalent(select('a.internal'), getById('link_1', 'link_2'));
assertEqual(select('a.internal.highlight')[0], secondLink);
assertEqual(select('a.highlight.internal')[0], secondLink);
assertEquivalent(select('a.highlight.internal.nonexistent'), []);
},
"#id.class": function(){
var secondLink = getById('link_2');
assertEqual(select('#link_2.internal')[0], secondLink);
assertEqual(select('.internal#link_2')[0], secondLink);
assertEqual(select('#link_2.internal.highlight')[0], secondLink);
assertEquivalent(select('#link_2.internal.nonexistent'), []);
},
"E#id.class": function(){
var secondLink = getById('link_2');
assertEqual(select('a#link_2.internal')[0], secondLink);
assertEqual(select('a.internal#link_2')[0], secondLink);
assertEqual(select('li#item_1.first')[0], getById("item_1"));
assertEquivalent(select('li#item_1.nonexistent'), []);
assertEquivalent(select('li#item_1.first.nonexistent'), []);
}
},
"Attribute Selectors": {
"[foo]": function(){
var body = DomUtils.getElementsByTagName("body", document, true, 1)[0];
assertEquivalent(select('[href]', body), select('a[href]', body));
assertEquivalent(select('[class~=internal]'), select('a[class~="internal"]'));
assertEquivalent(select('[id]'), select('*[id]'));
assertEquivalent(select('[type=radio]'), getById('checked_radio', 'unchecked_radio'));
assertEquivalent(select('[type=checkbox]'), select('*[type=checkbox]'));
assertEquivalent(select('[title]'), getById('with_title', 'commaParent'));
assertEquivalent(select('#troubleForm [type=radio]'), select('#troubleForm *[type=radio]'));
assertEquivalent(select('#troubleForm [type]'), select('#troubleForm *[type]'));
},
"E[foo]": function(){
assertEquivalent(select('h1[class]'), select('#fixtures h1'), "h1[class]");
//assertEquivalent(select('h1[CLASS]'), select('#fixtures h1'), "h1[CLASS]");
assertEqual(select('li#item_3[class]')[0], getById('item_3'), "li#item_3[class]");
assertEquivalent(select('#troubleForm2 input[name="brackets[5][]"]'), getById('chk_1', 'chk_2'));
//Brackets in attribute value
assertEqual(select('#troubleForm2 input[name="brackets[5][]"]:checked')[0], getById('chk_1'));
//Space in attribute value
assertEqual(select('cite[title="hello world!"]')[0], getById('with_title'));
/*
//Namespaced attributes
assertEquivalent(select('[xml:lang]'), [document, getById("item_3")]);
assertEquivalent(select('*[xml:lang]'), [document, getById("item_3")]);
*/
},
'E[foo="bar"]': function(){
assertEquivalent(select('a[href="#"]'), getById('link_1', 'link_2', 'link_3'));
/*this.assertThrowsException(/SYNTAX_ERR/, function(){
select('a[href=#]');
});*/
assertEqual(select('#troubleForm2 input[name="brackets[5][]"][value="2"]')[0], getById('chk_2'));
},
'E[foo~="bar"]': function(){
assertEquivalent(select('a[class~="internal"]'), getById('link_1', 'link_2'), "a[class~=\"internal\"]");
assertEquivalent(select('a[class~=internal]'), getById('link_1', 'link_2'), "a[class~=internal]");
assertEqual(select('a[class~=external][href="#"]')[0], getById('link_3'), 'a[class~=external][href="#"]');
},
/*
'E[foo|="en"]': function(){
assertEqual(select('*[xml:lang|="es"]')[0], getById('item_3'));
assertEqual(select('*[xml:lang|="ES"]')[0], getById('item_3'));
},
*/
'E[foo^="bar"]': function(){
assertEquivalent(select('div[class^=bro]'), getById('father', 'uncle'), 'matching beginning of string');
assertEquivalent(select('#level1 *[id^="level2_"]'), getById('level2_1', 'level2_2', 'level2_3'));
assertEquivalent(select('#level1 *[id^=level2_]'), getById('level2_1', 'level2_2', 'level2_3'));
},
'E[foo$="bar"]': function(){
assertEquivalent(select('div[class$=men]'), getById('father', 'uncle'), 'matching end of string');
assertEquivalent(select('#level1 *[id$="_1"]'), getById('level2_1', 'level3_1'));
assertEquivalent(select('#level1 *[id$=_1]'), getById('level2_1', 'level3_1'));
},
'E[foo*="bar"]': function(){
assertEquivalent(select('div[class*="ers m"]'), getById('father', 'uncle'), 'matching substring');
assertEquivalent(select('#level1 *[id*="2"]'), getById('level2_1', 'level3_2', 'level2_2', 'level2_3'));
/*this.assertThrowsException(/SYNTAX_ERR/, function(){
select('#level1 *[id*=2]');
});*/
}
// *** these should throw SYNTAX_ERR ***
/*'E[id=-1]': function(){
this.assertThrowsException(/SYNTAX_ERR/, function(){
select('#level1 *[id=-1]');
});
},
'E[class=-45deg]': function(){
this.assertThrowsException(/SYNTAX_ERR/, function(){
select('#level1 *[class=-45deg]');
});
},
'E[class=8mm]': function(){
this.assertThrowsException(/SYNTAX_ERR/, function(){
select('#level1 *[class=8mm]');
});
}*/
},
"Structural pseudo-classes": {
"E:first-child": function(){
assertEqual(select('#level1>*:first-child')[0], getById('level2_1'));
assertEquivalent(select('#level1 *:first-child'), getById('level2_1', 'level3_1', 'level_only_child'));
assertEquivalent(select('#level1>div:first-child'), []);
assertEquivalent(select('#level1 span:first-child'), getById('level2_1', 'level3_1'));
assertEquivalent(select('#level1:first-child'), []);
},
"E:last-child": function(){
assertEqual(select('#level1>*:last-child')[0], getById('level2_3'));
assertEquivalent(select('#level1 *:last-child'), getById('level3_2', 'level_only_child', 'level2_3'));
assertEqual(select('#level1>div:last-child')[0], getById('level2_3'));
assertEqual(select('#level1 div:last-child')[0], getById('level2_3'));
assertEquivalent(select('#level1>span:last-child'), []);
},
"E:nth-child(n)": function(){
assertEqual(select('#p *:nth-child(3)')[0], getById('link_2'));
assertEqual(select('#p a:nth-child(3)')[0], getById('link_2'), 'nth-child');
assertEquivalent(select('#list > li:nth-child(n+2)'), getById('item_2', 'item_3'));
assertEquivalent(select('#list > li:nth-child(-n+2)'), getById('item_1', 'item_2'));
},
"E:nth-of-type(n)": function(){
assertEqual(select('#p a:nth-of-type(2)')[0], getById('link_2'), 'nth-of-type');
assertEqual(select('#p a:nth-of-type(1)')[0], getById('link_1'), 'nth-of-type');
},
"E:nth-last-of-type(n)": function(){
assertEqual(select('#p a:nth-last-of-type(1)')[0], getById('link_2'), 'nth-last-of-type');
},
"E:first-of-type": function(){
assertEqual(select('#p a:first-of-type')[0], getById('link_1'), 'first-of-type');
},
"E:last-of-type": function(){
assertEqual(select('#p a:last-of-type')[0], getById('link_2'), 'last-of-type');
},
"E:only-child": function(){
assertEqual(select('#level1 *:only-child')[0], getById('level_only_child'));
//Shouldn't return anything
assertEquivalent(select('#level1>*:only-child'), []);
assertEquivalent(select('#level1:only-child'), []);
assertEquivalent(select('#level2_2 :only-child:not(:last-child)'), []);
assertEquivalent(select('#level2_2 :only-child:not(:first-child)'), []);
}/*,
"E:empty": function(){
getById('level3_1').children = [];
assertEquivalent(select('#level1 *:empty'), getById('level3_1', 'level3_2', 'level2_3'), '#level1 *:empty');
assertEquivalent(select('#level_only_child:empty'), [], 'newlines count as content!');
//Shouldn't return anything
assertEquivalent(select('span:empty > *'), []);
}*/
},
"E:not(s)": function(){
//Negation pseudo-class
assertEquivalent(select('a:not([href="#"])'), []);
assertEquivalent(select('div.brothers:not(.brothers)'), []);
assertEquivalent(select('a[class~=external]:not([href="#"])'), [], 'a[class~=external][href!="#"]');
assertEqual(select('#p a:not(:first-of-type)')[0], getById('link_2'), 'first-of-type');
assertEqual(select('#p a:not(:last-of-type)')[0], getById('link_1'), 'last-of-type');
assertEqual(select('#p a:not(:nth-of-type(1))')[0], getById('link_2'), 'nth-of-type');
assertEqual(select('#p a:not(:nth-last-of-type(1))')[0], getById('link_1'), 'nth-last-of-type');
assertEqual(select('#p a:not([rel~=nofollow])')[0], getById('link_2'), 'attribute 1');
assertEqual(select('#p a:not([rel^=external])')[0], getById('link_2'), 'attribute 2');
assertEqual(select('#p a:not([rel$=nofollow])')[0], getById('link_2'), 'attribute 3');
assertEqual(select('#p a:not([rel$="nofollow"]) > em')[0], getById('em'), 'attribute 4');
assertEqual(select('#list li:not(#item_1):not(#item_3)')[0], getById('item_2'), 'adjacent :not clauses');
assertEqual(select('#grandfather > div:not(#uncle) #son')[0], getById('son'));
assertEqual(select('#p a:not([rel$="nofollow"]) em')[0], getById('em'), 'attribute 4 + all descendants');
assertEqual(select('#p a:not([rel$="nofollow"])>em')[0], getById('em'), 'attribute 4 (without whitespace)');
},
"UI element states pseudo-classes": {
"E:disabled": function(){
assertEqual(select('#troubleForm > p > *:disabled')[0], getById('disabled_text_field'));
},
"E:checked": function(){
assertEquivalent(select('#troubleForm *:checked'), getById('checked_box', 'checked_radio'));
}
},
"Combinators": {
"E F": function(){
//Descendant
assertEquivalent(select('#fixtures a *'), getById('em2', 'em', 'span'));
assertEqual(select('div#fixtures p')[0], getById("p"));
},
"E + F": function(){
//Adjacent sibling
assertEqual(select('div.brothers + div.brothers')[0], getById("uncle"));
assertEqual(select('div.brothers + div')[0], getById('uncle'));
assertEqual(select('#level2_1+span')[0], getById('level2_2'));
assertEqual(select('#level2_1 + span')[0], getById('level2_2'));
assertEqual(select('#level2_1 + *')[0], getById('level2_2'));
assertEquivalent(select('#level2_2 + span'), []);
assertEqual(select('#level3_1 + span')[0], getById('level3_2'));
assertEqual(select('#level3_1 + *')[0], getById('level3_2'));
assertEquivalent(select('#level3_2 + *'), []);
assertEquivalent(select('#level3_1 + em'), []);
},
"E > F": function(){
//Child
assertEquivalent(select('p.first > a'), getById('link_1', 'link_2'));
assertEquivalent(select('div#grandfather > div'), getById('father', 'uncle'));
assertEquivalent(select('#level1>span'), getById('level2_1', 'level2_2'));
assertEquivalent(select('#level1 > span'), getById('level2_1', 'level2_2'));
assertEquivalent(select('#level2_1 > *'), getById('level3_1', 'level3_2'));
assertEquivalent(select('div > #nonexistent'), []);
},
"E ~ F": function(){
//General sibling
assertEqual(select('h1 ~ ul')[0], getById('list'));
assertEquivalent(select('#level2_2 ~ span'), []);
assertEquivalent(select('#level3_2 ~ *'), []);
assertEquivalent(select('#level3_1 ~ em'), []);
assertEquivalent(select('div ~ #level3_2'), []);
assertEquivalent(select('div ~ #level2_3'), []);
assertEqual(select('#level2_1 ~ span')[0], getById('level2_2'));
assertEquivalent(select('#level2_1 ~ *'), getById('level2_2', 'level2_3'));
assertEqual(select('#level3_1 ~ #level3_2')[0], getById('level3_2'));
assertEqual(select('span ~ #level3_2')[0], getById('level3_2'));
}
},
"NW.Dom.match": function(){
var element = getById('dupL1');
//Assertions
assert(match(element, 'span'));
assert(match(element, "span#dupL1"));
assert(match(element, "div > span"), "child combinator");
assert(match(element, "#dupContainer span"), "descendant combinator");
assert(match(element, "#dupL1"), "ID only");
assert(match(element, "span.span_foo"), "class name 1");
assert(match(element, "span.span_bar"), "class name 2");
assert(match(element, "span:first-child"), "first-child pseudoclass");
//Refutations
refute(match(element, "span.span_wtf"), "bogus class name");
refute(match(element, "#dupL2"), "different ID");
refute(match(element, "div"), "different tag name");
refute(match(element, "span span"), "different ancestry");
refute(match(element, "span > span"), "different parent");
refute(match(element, "span:nth-child(5)"), "different pseudoclass");
//Misc.
refute(match(getById('link_2'), 'a[rel^=external]'));
assert(match(getById('link_1'), 'a[rel^=external]'));
assert(match(getById('link_1'), 'a[rel^="external"]'));
assert(match(getById('link_1'), "a[rel^='external']"));
},
"Equivalent Selectors": function(){
assertEquivalent(select('div.brothers'), select('div[class~=brothers]'));
assertEquivalent(select('div.brothers'), select('div[class~=brothers].brothers'));
assertEquivalent(select('div:not(.brothers)'), select('div:not([class~=brothers])'));
assertEquivalent(select('li ~ li'), select('li:not(:first-child)'));
assertEquivalent(select('ul > li'), select('ul > li:nth-child(n)'));
assertEquivalent(select('ul > li:nth-child(even)'), select('ul > li:nth-child(2n)'));
assertEquivalent(select('ul > li:nth-child(odd)'), select('ul > li:nth-child(2n+1)'));
assertEquivalent(select('ul > li:first-child'), select('ul > li:nth-child(1)'));
assertEquivalent(select('ul > li:last-child'), select('ul > li:nth-last-child(1)'));
/* Opera 10 does not accept values > 128 as a parameter to :nth-child
See <http://operawiki.info/ArtificialLimits> */
assertEquivalent(select('ul > li:nth-child(n-128)'), select('ul > li'));
assertEquivalent(select('ul>li'), select('ul > li'));
assertEquivalent(select('#p a:not([rel$="nofollow"])>em'), select('#p a:not([rel$="nofollow"]) > em'));
},
"Multiple Selectors": function(){
//The next two assertions should return document-ordered lists of matching elements --Diego Perini
//assertEquivalent(select('#list, .first,*[xml:lang="es-us"] , #troubleForm'), getById('p', 'link_1', 'list', 'item_1', 'item_3', 'troubleForm'));
//assertEquivalent(select('#list, .first, *[xml:lang="es-us"], #troubleForm'), getById('p', 'link_1', 'list', 'item_1', 'item_3', 'troubleForm'));
assertEquivalent(select('form[title*="commas,"], input[value="#commaOne,#commaTwo"]'), getById('commaParent', 'commaChild'));
assertEquivalent(select('form[title*="commas,"], input[value="#commaOne,#commaTwo"]'), getById('commaParent', 'commaChild'));
}
};

View File

@@ -0,0 +1,92 @@
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
<head>
<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
<title>NWMatcher Tests</title>
<link rel="stylesheet" type="text/css" href="assets/style.css" media="screen" />
<script type="text/javascript" src="../../src/nwmatcher.js"></script>
<script type="text/javascript" src="scotch.js"></script>
<script type="text/javascript" src="test.js"></script>
</head>
<body>
<div id="container">
<div id="testlog" class="log"></div>
<!-- Test elements -->
<div id="fixtures" style="display: none;">
<h1 class="title">Some title <span>here</span></h1>
<p id="p" class="first summary">
<strong id="strong">This</strong> is a short blurb
<a id="link_1" class="first internal" rel="external nofollow" href="#">with a <em id="em2">link</em></a> or
<a id="link_2" class="internal highlight" href="#"><em id="em">two</em></a>.
Or <cite id="with_title" title="hello world!">a citation</cite>.
</p>
<ul id="list">
<li id="item_1" class="first"><a id="link_3" href="#" class="external"><span id="span">Another link</span></a></li>
<li id="item_2">Some text</li>
<li id="item_3" xml:lang="es-us" class="">Otra cosa</li>
</ul>
<!-- This form has a field with the name "id"; its "ID" property won't be "troubleForm" -->
<form id="troubleForm" action="">
<p>
<input type="hidden" name="id" id="hidden" />
<input type="text" name="disabled_text_field" id="disabled_text_field" disabled="disabled" />
<input type="text" name="enabled_text_field" id="enabled_text_field" />
<input type="checkbox" name="checkboxes" id="checked_box" checked="checked" value="Checked" />
<input type="checkbox" name="checkboxes" id="unchecked_box" value="Unchecked"/>
<input type="radio" name="radiobuttons" id="checked_radio" checked="checked" value="Checked" />
<input type="radio" name="radiobuttons" id="unchecked_radio" value="Unchecked" />
</p>
</form>
<form id="troubleForm2" action="">
<p>
<input type="checkbox" name="brackets[5][]" id="chk_1" checked="checked" value="1" />
<input type="checkbox" name="brackets[5][]" id="chk_2" value="2" />
</p>
</form>
<div id="level1">
<span id="level2_1">
<span id="level3_1"></span>
<!-- This comment should be ignored by the adjacent selector -->
<span id="level3_2"></span>
</span>
<span id="level2_2">
<em id="level_only_child">
</em>
</span>
<div id="level2_3"></div>
</div> <!-- #level1 -->
<div id="dupContainer">
<span id="dupL1" class="span_foo span_bar">
<span id="dupL2">
<span id="dupL3">
<span id="dupL4">
<span id="dupL5"></span>
</span>
</span>
</span>
</span>
</div> <!-- #dupContainer -->
<div id="grandfather"> grandfather
<div id="father" class="brothers men"> father
<div id="son"> son </div>
</div>
<div id="uncle" class="brothers men"> uncle </div>
</div>
<form id="commaParent" title="commas,are,good" action="">
<p>
<input type="hidden" id="commaChild" name="foo" value="#commaOne,#commaTwo" />
<input type="hidden" id="commaTwo" name="foo2" value="oops" />
</p>
</form>
<div id="counted_container"><div class="is_counted"></div></div>
</div>
</div>
</body>
</html>

View File

@@ -0,0 +1,132 @@
<!DOCTYPE HTML>
<html lang="en-us">
<head>
<meta http-equiv="Content-type" content="text/html; charset=utf-8">
<title>Qwery tests</title>
<style type="text/css">
#fixtures {
position: absolute;
top: -9999px;
}
</style>
<link rel="stylesheet" href="../node_modules/sink-test/src/sink.css" type="text/css">
<script src="../node_modules/sink-test/src/sink.js"></script>
<script src="../src/qwery.js"></script>
<script src="../pseudos/qwery-pseudos.js"></script>
<script type="text/javascript">
var Q = qwery
</script>
</head>
<body>
<h1>Qwery Tests</h1>
<div id="fixtures">
<ol id="list">
<li>hello</li>
<li>world</li>
<ol>
<li>world</li>
<li id="attr-child-boosh" attr="boosh">hello</li>
</ol>
<li>humans</li>
</ol>
<div id="spaced-tokens">
<p><em><a href="#"></a></em></p>
<p></p>
</div>
<div id="pseudos">
<div class="odd pseudos pseudo-1"></div>
<div class="even pseudos pseudo-2"></div>
<div class="odd"></div>
<div class="even"></div>
<a class="odd"></a>
<div class="even"></div>
<div class="odd"></div>
</div>
<div foo="bar"></div>
<div class="a"></div>
<div class="class-with-dashes"></div>
<div id="boosh">
<!-- comment -->
<!-- comment -->
<div class="a b">
<div class="d e" test="fg" id="booshTest"></div>
<!-- comment -->
<em nopass="copyrighters" rel="copyright booshrs" test="f g"></em>
<span class="h i a"></span>
</div>
<!-- comment -->
</div>
<div id="lonelyBoosh"></div>
<div id="attr-test1" -data-attr></div>
<div id="attr-test2" -data-attr></div>
<div id="attr-test3" class="found you" -data-attr title="whatup duders"></div>
<div id="attributes">
<div test="one" unique-test="baz" id="attr-test-1"></div>
<div test="two-foo" id="attr-test-2"></div>
<div test=" three " id="attr-test-3"></div>
<a href="#aname" id="attr-test-4">aname</a>
</div>
<div class="idless">
<div class="tokens" title="one" id="token-one"></div>
<div class="tokens" title="one two" id="token-two"></div>
<div class="tokens" title="one two three #%" id="token-three">
<a href="foo" id="token-four">
<div id="token-five"></div>
</a>
</div>
</div>
<div id="order-matters" class="order-matters">
<p class="order-matters"></p>
<a class="order-matters">
<em class="order-matters"></em><b class="order-matters"></b>
</a>
</div>
<div id="direct-descend" class="oogabooga">
<div></div>
<div class="direct-descend">
<span></span>
<div class="direct-descend">
<div class="lvl2" id="toodeep"><span></span></div>
</div>
<div class="direct-descend"><span></span></div>
<div class="lvl2" id="l2">
<span></span>
<div class="direct-descend"><span></span></div>
</div>
<div class="lvl2" id="l3"></div>
</div>
<div class="ignoreme"></div>
<div class="direct-descend">
<div class="direct-descend"></div>
<div class="lvl2" id="l4"></div>
</div>
<div></div>
</div>
<div id="sibling-selector"></div>
<div class="sibling-selector" id="sib1">
<div class="sibling-selector"></div>
<div class="sibling-selector"></div>
</div>
<div class="sibling-selector" id="sib2">
<div class="sibling-selector">
<div class="sibling-selector"></div>
</div>
</div>
<div class="parent">
<h1 class="sibling oldest"></h1>
<h2 class="sibling older"></h2>
<h3 class="sibling middle"></h3>
<h4 class="sibling younger"></h4>
<h5 class="sibling youngest"></h5>
</div>
<form>
<button></button>
<input type="text">
<input type="hidden">
</form>
</div>
<ol id="tests"></ol>
<iframe id="frame" style="width: 0; height: 0; margin-left: -1000px;"></iframe>
<script src="tests.js"></script>
</body>
</html>

View File

@@ -0,0 +1,548 @@
"use strict";
var expect = require("expect.js"),
DomUtils = require("htmlparser2").DomUtils,
helper = require("../helper.js"),
document = helper.getDOM(require("fs").readFileSync(__dirname + "/index.html")+""),
CSSselect = helper.CSSselect;
var location = {hash: ""};
CSSselect.pseudos.target = function(elem){
return elem.attribs && elem.attribs.id === location.hash.substr(1);
};
//---
/*
The following is taken from https://github.com/ded/qwery/blob/master/tests/tests.js
*/
CSSselect.pseudos.humanoid = function(e, v) { return CSSselect.is(e, 'li:contains(human)') || CSSselect.is(e, 'ol:contains(human)'); };
var frag = helper.getDOM(
'<div class="d i v">' +
'<p id="oooo"><em></em><em id="emem"></em></p>' +
'</div>' +
'<p id="sep">' +
'<div class="a"><span></span></div>' +
'</p>'
);
var doc = helper.getDOM(
'<div id="hsoob">' +
'<div class="a b">' +
'<div class="d e sib" test="fg" id="booshTest"><p><span id="spanny"></span></p></div>' +
'<em nopass="copyrighters" rel="copyright booshrs" test="f g" class="sib"></em>' +
'<span class="h i a sib"></span>' +
'</div>' +
'<p class="odd"></p>' +
'</div>' +
'<div id="lonelyHsoob"></div>'
);
var el = DomUtils.getElementById('attr-child-boosh', document);
var pseudos = DomUtils.getElementById('pseudos', document).children;
module.exports = {
'Contexts': {
'should be able to pass optional context': function () {
expect(CSSselect('.a', document)).to.have.length(3); //no context found 3 elements (.a)
expect(CSSselect('.a', CSSselect('#boosh', document))).to.have.length(2); //context found 2 elements (#boosh .a)
},
/*
'should be able to pass string as context': function() {
expect(CSSselect('.a', '#boosh')).to.have.length(2); //context found 2 elements(.a, #boosh)
expect(CSSselect('.a', '.a')).to.be.empty(); //context found 0 elements(.a, .a)
expect(CSSselect('.a', '.b')).to.have.length(1); //context found 1 elements(.a, .b)
expect(CSSselect('.a', '#boosh .b')).to.have.length(1); //context found 1 elements(.a, #boosh .b)
expect(CSSselect('.b', '#boosh .b')).to.be.empty(); //context found 0 elements(.b, #boosh .b)
},
*/
/*
'should be able to pass qwery result as context': function() {
expect(CSSselect('.a', CSSselect('#boosh', document))).to.have.length(2); //context found 2 elements(.a, #boosh)
expect(CSSselect('.a', CSSselect('.a', document))).to.be.empty(); //context found 0 elements(.a, .a)
expect(CSSselect('.a', CSSselect('.b', document))).to.have.length(1); //context found 1 elements(.a, .b)
expect(CSSselect('.a', CSSselect('#boosh .b', document))).to.have.length(1); //context found 1 elements(.a, #boosh .b)
expect(CSSselect('.b', CSSselect('#boosh .b', document))).to.be.empty(); //context found 0 elements(.b, #boosh .b)
},
*/
'should not return duplicates from combinators': function () {
expect(CSSselect('#boosh,#boosh', document)).to.have.length(1); //two booshes dont make a thing go right
expect(CSSselect('#boosh,.apples,#boosh', document)).to.have.length(1); //two booshes and an apple dont make a thing go right
},
'byId sub-queries within context': function() {
expect(CSSselect('#booshTest', CSSselect('#boosh', document))).to.have.length(1); //found "#id #id"
expect(CSSselect('.a.b #booshTest', CSSselect('#boosh', document))).to.have.length(1); //found ".class.class #id"
expect(CSSselect('.a>#booshTest', CSSselect('#boosh', document))).to.have.length(1); //found ".class>#id"
expect(CSSselect('>.a>#booshTest', CSSselect('#boosh', document))).to.have.length(1); //found ">.class>#id"
expect(CSSselect('#boosh', CSSselect('#booshTest', document)).length).to.not.be.ok(); //shouldn't find #boosh (ancestor) within #booshTest (descendent)
expect(CSSselect('#boosh', CSSselect('#lonelyBoosh', document)).length).to.not.be.ok(); //shouldn't find #boosh within #lonelyBoosh (unrelated)
}
},
'CSS 1': {
'get element by id': function () {
var result = CSSselect('#boosh', document);
expect(result[0]).to.be.ok(); //found element with id=boosh
expect(CSSselect('h1', document)[0]).to.be.ok(); //found 1 h1
},
'byId sub-queries': function() {
expect(CSSselect('#boosh #booshTest', document)).to.have.length(1); //found "#id #id"
expect(CSSselect('.a.b #booshTest', document)).to.have.length(1); //found ".class.class #id"
expect(CSSselect('#boosh>.a>#booshTest', document)).to.have.length(1); //found "#id>.class>#id"
expect(CSSselect('.a>#booshTest', document)).to.have.length(1); //found ".class>#id"
},
'get elements by class': function () {
expect(CSSselect('#boosh .a', document)).to.have.length(2); //found two elements
expect(CSSselect('#boosh div.a', document)[0]).to.be.ok(); //found one element
expect(CSSselect('#boosh div', document)).to.have.length(2); //found two {div} elements
expect(CSSselect('#boosh span', document)[0]).to.be.ok(); //found one {span} element
expect(CSSselect('#boosh div div', document)[0]).to.be.ok(); //found a single div
expect(CSSselect('a.odd', document)).to.have.length(1); //found single a
},
'combos': function () {
expect(CSSselect('#boosh div,#boosh span', document)).to.have.length(3); //found 2 divs and 1 span
},
'class with dashes': function() {
expect(CSSselect('.class-with-dashes', document)).to.have.length(1); //found something
},
'should ignore comment nodes': function() {
expect(CSSselect('#boosh *', document)).to.have.length(4); //found only 4 elements under #boosh
},
'deep messy relationships': function() {
// these are mostly characterised by a combination of tight relationships and loose relationships
// on the right side of the query it's easy to find matches but they tighten up quickly as you
// go to the left
// they are useful for making sure the dom crawler doesn't stop short or over-extend as it works
// up the tree the crawl needs to be comprehensive
expect(CSSselect('div#fixtures > div a', document)).to.have.length(5); //found four results for "div#fixtures > div a"
expect(CSSselect('.direct-descend > .direct-descend .lvl2', document)).to.have.length(1); //found one result for ".direct-descend > .direct-descend .lvl2"
expect(CSSselect('.direct-descend > .direct-descend div', document)).to.have.length(1); //found one result for ".direct-descend > .direct-descend div"
expect(CSSselect('.direct-descend > .direct-descend div', document)).to.have.length(1); //found one result for ".direct-descend > .direct-descend div"
expect(CSSselect('div#fixtures div ~ a div', document)).to.be.empty(); //found no results for odd query
expect(CSSselect('.direct-descend > .direct-descend > .direct-descend ~ .lvl2', document)).to.be.empty(); //found no results for another odd query
}
},
'CSS 2': {
'get elements by attribute': function () {
var wanted = CSSselect('#boosh div[test]', document)[0];
var expected = DomUtils.getElementById('booshTest', document);
expect(wanted).to.be(expected); //found attribute
expect(CSSselect('#boosh div[test=fg]', document)[0]).to.be(expected); //found attribute with value
expect(CSSselect('em[rel~="copyright"]', document)).to.have.length(1); //found em[rel~="copyright"]
expect(CSSselect('em[nopass~="copyright"]', document)).to.be.empty(); //found em[nopass~="copyright"]
},
'should not throw error by attribute selector': function () {
expect(CSSselect('[foo^="bar"]', document)).to.have.length(1); //found 1 element
},
'crazy town': function () {
var el = DomUtils.getElementById('attr-test3', document);
expect(CSSselect('div#attr-test3.found.you[title="whatup duders"]', document)[0]).to.be(el); //found the right element
}
},
'attribute selectors': {
/* CSS 2 SPEC */
'[attr]': function () {
var expected = DomUtils.getElementById('attr-test-1', document);
expect(CSSselect('#attributes div[unique-test]', document)[0]).to.be(expected); //found attribute with [attr]
},
'[attr=val]': function () {
var expected = DomUtils.getElementById('attr-test-2', document);
expect(CSSselect('#attributes div[test="two-foo"]', document)[0]).to.be(expected); //found attribute with =
expect(CSSselect("#attributes div[test='two-foo']", document)[0]).to.be(expected); //found attribute with =
expect(CSSselect('#attributes div[test=two-foo]', document)[0]).to.be(expected); //found attribute with =
},
'[attr~=val]': function () {
var expected = DomUtils.getElementById('attr-test-3', document);
expect(CSSselect('#attributes div[test~=three]', document)[0]).to.be(expected); //found attribute with ~=
},
'[attr|=val]': function () {
var expected = DomUtils.getElementById('attr-test-2', document);
expect(CSSselect('#attributes div[test|="two-foo"]', document)[0]).to.be(expected); //found attribute with |=
expect(CSSselect('#attributes div[test|=two]', document)[0]).to.be(expected); //found attribute with |=
},
'[href=#x] special case': function () {
var expected = DomUtils.getElementById('attr-test-4', document);
expect(CSSselect('#attributes a[href="#aname"]', document)[0]).to.be(expected); //found attribute with href=#x
},
/* CSS 3 SPEC */
'[attr^=val]': function () {
var expected = DomUtils.getElementById('attr-test-2', document);
expect(CSSselect('#attributes div[test^=two]', document)[0]).to.be(expected); //found attribute with ^=
},
'[attr$=val]': function () {
var expected = DomUtils.getElementById('attr-test-2', document);
expect(CSSselect('#attributes div[test$=foo]', document)[0]).to.be(expected); //found attribute with $=
},
'[attr*=val]': function () {
var expected = DomUtils.getElementById('attr-test-3', document);
expect(CSSselect('#attributes div[test*=hree]', document)[0]).to.be(expected); //found attribute with *=
},
'direct descendants': function () {
expect(CSSselect('#direct-descend > .direct-descend', document)).to.have.length(2); //found two direct descendents
expect(CSSselect('#direct-descend > .direct-descend > .lvl2', document)).to.have.length(3); //found three second-level direct descendents
},
'sibling elements': function () {
expect(CSSselect('#sibling-selector ~ .sibling-selector', document)).to.have.length(2); //found two siblings
expect(CSSselect('#sibling-selector ~ div.sibling-selector', document)).to.have.length(2); //found two siblings
expect(CSSselect('#sibling-selector + div.sibling-selector', document)).to.have.length(1); //found one sibling
expect(CSSselect('#sibling-selector + .sibling-selector', document)).to.have.length(1); //found one sibling
expect(CSSselect('.parent .oldest ~ .sibling', document)).to.have.length(4); //found four younger siblings
expect(CSSselect('.parent .middle ~ .sibling', document)).to.have.length(2); //found two younger siblings
expect(CSSselect('.parent .middle ~ h4', document)).to.have.length(1); //found next sibling by tag
expect(CSSselect('.parent .middle ~ h4.younger', document)).to.have.length(1); //found next sibling by tag and class
expect(CSSselect('.parent .middle ~ h3', document)).to.be.empty(); //an element can't be its own sibling
expect(CSSselect('.parent .middle ~ h2', document)).to.be.empty(); //didn't find an older sibling
expect(CSSselect('.parent .youngest ~ .sibling', document)).to.be.empty(); //found no younger siblings
expect(CSSselect('.parent .oldest + .sibling', document)).to.have.length(1); //found next sibling
expect(CSSselect('.parent .middle + .sibling', document)).to.have.length(1); //found next sibling
expect(CSSselect('.parent .middle + h4', document)).to.have.length(1); //found next sibling by tag
expect(CSSselect('.parent .middle + h3', document)).to.be.empty(); //an element can't be its own sibling
expect(CSSselect('.parent .middle + h2', document)).to.be.empty(); //didn't find an older sibling
expect(CSSselect('.parent .youngest + .sibling', document)).to.be.empty(); //found no younger siblings
}
},
/*
'Uniq': {
'duplicates arent found in arrays': function () {
expect(CSSselect.uniq(['a', 'b', 'c', 'd', 'e', 'a', 'b', 'c', 'd', 'e'])).to.have.length(5); //result should be a, b, c, d, e
expect(CSSselect.uniq(['a', 'b', 'c', 'c', 'c'])).to.have.length(3); //result should be a, b, c
}
},
*/
'element-context queries': {
/*
'relationship-first queries': function() {
expect(CSSselect('> .direct-descend', CSSselect('#direct-descend', document))).to.have.length(2); //found two direct descendents using > first
expect(CSSselect('~ .sibling-selector', CSSselect('#sibling-selector', document))).to.have.length(2); //found two siblings with ~ first
expect(CSSselect('+ .sibling-selector', CSSselect('#sibling-selector', document))).to.have.length(1); //found one sibling with + first
expect(CSSselect('> .tokens a', CSSselect('.idless', document)[0])).to.have.length(1); //found one sibling from a root with no id
},
*/
// should be able to query on an element that hasn't been inserted into the dom
'detached fragments': function() {
expect(CSSselect('.a span', frag)).to.have.length(1); //should find child elements of fragment
//expect(CSSselect('> div p em', frag)).to.have.length(2); //should find child elements of fragment, relationship first
},
'byId sub-queries within detached fragment': function () {
expect(CSSselect('#emem', frag)).to.have.length(1); //found "#id" in fragment
expect(CSSselect('.d.i #emem', frag)).to.have.length(1); //found ".class.class #id" in fragment
expect(CSSselect('.d #oooo #emem', frag)).to.have.length(1); //found ".class #id #id" in fragment
//expect(CSSselect('> div #oooo', frag)).to.have.length(1); //found "> .class #id" in fragment
expect(CSSselect('#oooo', CSSselect('#emem', frag)).length).to.not.be.ok(); //shouldn't find #oooo (ancestor) within #emem (descendent)
expect(CSSselect('#sep', CSSselect('#emem', frag)).length).to.not.be.ok(); //shouldn't find #sep within #emem (unrelated)
},
/*
'exclude self in match': function() {
expect(CSSselect('.order-matters', CSSselect('#order-matters', document))).to.have.length(4); //should not include self in element-context queries
},
*/
// because form's have .length
'forms can be used as contexts': function() {
expect(CSSselect('*', CSSselect('form', document)[0])).to.have.length(3); //found 3 elements under &lt;form&gt;
}
},
'tokenizer': {
'should not get weird tokens': function () {
expect(CSSselect('div .tokens[title="one"]', document)[0]).to.be(DomUtils.getElementById('token-one', document)); //found div .tokens[title="one"]
expect(CSSselect('div .tokens[title="one two"]', document)[0]).to.be(DomUtils.getElementById('token-two', document)); //found div .tokens[title="one two"]
expect(CSSselect('div .tokens[title="one two three #%"]', document)[0]).to.be(DomUtils.getElementById('token-three', document)); //found div .tokens[title="one two three #%"]
expect(CSSselect("div .tokens[title='one two three #%'] a", document)[0]).to.be(DomUtils.getElementById('token-four', document)); //found div .tokens[title=\'one two three #%\'] a
expect(CSSselect('div .tokens[title="one two three #%"] a[href$=foo] div', document)[0]).to.be(DomUtils.getElementById('token-five', document)); //found div .tokens[title="one two three #%"] a[href=foo] div
}
},
'interesting syntaxes': {
'should parse bad selectors': function () {
expect(CSSselect('#spaced-tokens p em a', document).length).to.be.ok(); //found element with funny tokens
}
},
'order matters': {
// <div id="order-matters">
// <p class="order-matters"></p>
// <a class="order-matters">
// <em class="order-matters"></em><b class="order-matters"></b>
// </a>
// </div>
'the order of elements return matters': function () {
function tag(el) {
return el.name.toLowerCase();
}
var els = CSSselect('#order-matters .order-matters', document);
expect(tag(els[0])).to.be('p'); //first element matched is a {p} tag
expect(tag(els[1])).to.be('a'); //first element matched is a {a} tag
expect(tag(els[2])).to.be('em'); //first element matched is a {em} tag
expect(tag(els[3])).to.be('b'); //first element matched is a {b} tag
}
},
'pseudo-selectors': {
':contains': function() {
expect(CSSselect('li:contains(humans)', document)).to.have.length(1); //found by "element:contains(text)"
expect(CSSselect(':contains(humans)', document)).to.have.length(5); //found by ":contains(text)", including all ancestors
// * is an important case, can cause weird errors
expect(CSSselect('*:contains(humans)', document)).to.have.length(5); //found by "*:contains(text)", including all ancestors
expect(CSSselect('ol:contains(humans)', document)).to.have.length(1); //found by "ancestor:contains(text)"
},
':not': function() {
expect(CSSselect('.odd:not(div)', document)).to.have.length(1); //found one .odd :not an &lt;a&gt;
},
':first-child': function () {
expect(CSSselect('#pseudos div:first-child', document)[0]).to.be(pseudos[0]); //found first child
expect(CSSselect('#pseudos div:first-child', document)).to.have.length(1); //found only 1
},
':last-child': function () {
var all = DomUtils.getElementsByTagName('div', pseudos);
expect(CSSselect('#pseudos div:last-child', document)[0]).to.be(all[all.length - 1]); //found last child
expect(CSSselect('#pseudos div:last-child', document)).to.have.length(1); //found only 1
},
'ol > li[attr="boosh"]:last-child': function () {
var expected = DomUtils.getElementById('attr-child-boosh', document);
expect(CSSselect('ol > li[attr="boosh"]:last-child', document)).to.have.length(1); //only 1 element found
expect(CSSselect('ol > li[attr="boosh"]:last-child', document)[0]).to.be(expected); //found correct element
},
':nth-child(odd|even|x)': function () {
var second = DomUtils.getElementsByTagName('div', pseudos)[1];
expect(CSSselect('#pseudos :nth-child(odd)', document)).to.have.length(4); //found 4 odd elements
expect(CSSselect('#pseudos div:nth-child(odd)', document)).to.have.length(3); //found 3 odd elements with div tag
expect(CSSselect('#pseudos div:nth-child(even)', document)).to.have.length(3); //found 3 even elements with div tag
expect(CSSselect('#pseudos div:nth-child(2)', document)[0]).to.be(second); //found 2nd nth-child of pseudos
},
':nth-child(expr)': function () {
var fifth = DomUtils.getElementsByTagName('a', pseudos)[0];
var sixth = DomUtils.getElementsByTagName('div', pseudos)[4];
expect(CSSselect('#pseudos :nth-child(3n+1)', document)).to.have.length(3); //found 3 elements
expect(CSSselect('#pseudos :nth-child(+3n-2)', document)).to.have.length(3); //found 3 elements'
expect(CSSselect('#pseudos :nth-child(-n+6)', document)).to.have.length(6); //found 6 elements
expect(CSSselect('#pseudos :nth-child(-n+5)', document)).to.have.length(5); //found 5 elements
expect(CSSselect('#pseudos :nth-child(3n+2)', document)[1]).to.be(fifth); //second :nth-child(3n+2) is the fifth child
expect(CSSselect('#pseudos :nth-child(3n)', document)[1]).to.be(sixth); //second :nth-child(3n) is the sixth child
},
':nth-last-child(odd|even|x)': function () {
var second = DomUtils.getElementsByTagName('div', pseudos)[1];
expect(CSSselect('#pseudos :nth-last-child(odd)', document)).to.have.length(4); //found 4 odd elements
expect(CSSselect('#pseudos div:nth-last-child(odd)', document)).to.have.length(3); //found 3 odd elements with div tag
expect(CSSselect('#pseudos div:nth-last-child(even)', document)).to.have.length(3); //found 3 even elements with div tag
expect(CSSselect('#pseudos div:nth-last-child(6)', document)[0]).to.be(second); //6th nth-last-child should be 2nd of 7 elements
},
':nth-last-child(expr)': function () {
var third = DomUtils.getElementsByTagName('div', pseudos)[2];
expect(CSSselect('#pseudos :nth-last-child(3n+1)', document)).to.have.length(3); //found 3 elements
expect(CSSselect('#pseudos :nth-last-child(3n-2)', document)).to.have.length(3); //found 3 elements
expect(CSSselect('#pseudos :nth-last-child(-n+6)', document)).to.have.length(6); //found 6 elements
expect(CSSselect('#pseudos :nth-last-child(-n+5)', document)).to.have.length(5); //found 5 elements
expect(CSSselect('#pseudos :nth-last-child(3n+2)', document)[0]).to.be(third); //first :nth-last-child(3n+2) is the third child
},
':nth-of-type(expr)': function () {
var a = DomUtils.getElementsByTagName('a', pseudos)[0];
expect(CSSselect('#pseudos div:nth-of-type(3n+1)', document)).to.have.length(2); //found 2 div elements
expect(CSSselect('#pseudos a:nth-of-type(3n+1)', document)).to.have.length(1); //found 1 a element
expect(CSSselect('#pseudos a:nth-of-type(3n+1)', document)[0]).to.be(a); //found the right a element
expect(CSSselect('#pseudos a:nth-of-type(3n)', document)).to.be.empty(); //no matches for every third a
expect(CSSselect('#pseudos a:nth-of-type(odd)', document)).to.have.length(1); //found the odd a
expect(CSSselect('#pseudos a:nth-of-type(1)', document)).to.have.length(1); //found the first a
},
':nth-last-of-type(expr)': function () {
var second = DomUtils.getElementsByTagName('div', pseudos)[1];
expect(CSSselect('#pseudos div:nth-last-of-type(3n+1)', document)).to.have.length(2); //found 2 div elements
expect(CSSselect('#pseudos a:nth-last-of-type(3n+1)', document)).to.have.length(1); //found 1 a element
expect(CSSselect('#pseudos div:nth-last-of-type(5)', document)[0]).to.be(second); //5th nth-last-of-type should be 2nd of 7 elements
},
':first-of-type': function () {
expect(CSSselect('#pseudos a:first-of-type', document)[0]).to.be(DomUtils.getElementsByTagName('a', pseudos)[0]); //found first a element
expect(CSSselect('#pseudos a:first-of-type', document)).to.have.length(1); //found only 1
},
':last-of-type': function () {
var all = DomUtils.getElementsByTagName('div', pseudos);
expect(CSSselect('#pseudos div:last-of-type', document)[0]).to.be(all[all.length - 1]); //found last div element
expect(CSSselect('#pseudos div:last-of-type', document)).to.have.length(1); //found only 1
},
':only-of-type': function () {
expect(CSSselect('#pseudos a:only-of-type', document)[0]).to.be(DomUtils.getElementsByTagName('a', pseudos)[0]); //found the only a element
expect(CSSselect('#pseudos a:first-of-type', document)).to.have.length(1); //found only 1
},
':target': function () {
location.hash = '';
expect(CSSselect('#pseudos:target', document)).to.be.empty(); //#pseudos is not the target
location.hash = '#pseudos';
expect(CSSselect('#pseudos:target', document)).to.have.length(1); //now #pseudos is the target
location.hash = '';
},
'custom pseudos': function() {
// :humanoid implemented just for testing purposes
expect(CSSselect(':humanoid', document)).to.have.length(2); //selected using custom pseudo
}
},
/*
'argument types': {
'should be able to pass in nodes as arguments': function () {
var el = DomUtils.getElementById('boosh', document);
expect(CSSselect(el)[0]).to.be(el); //CSSselect(el)[0] == el
expect(CSSselect(el, 'body')[0]).to.be(el); //CSSselect(el, 'body')[0] == el
expect(CSSselect(el, document)[0]).to.be(el); //CSSselect(el, document)[0] == el
expect(CSSselect(window)[0]).to.be(window); //CSSselect(window)[0] == window
expect(CSSselect(document)[0]).to.be(document); //CSSselect(document)[0] == document
},
'should be able to pass in an array of results as arguments': function () {
var el = DomUtils.getElementById('boosh', document);
var result = CSSselect([CSSselect('#boosh', document), CSSselect(document), CSSselect(window)]);
expect(result).to.have.length(3); //3 elements in the combined set
expect(result[0]).to.be(el); //result[0] == el
expect(result[1]).to.be(document); //result[0] == document
expect(result[2]).to.be(window); //result[0] == window
expect(CSSselect([CSSselect('#pseudos div.odd', document), CSSselect('#pseudos div.even', document)])).to.have.length(6); //found all the odd and even divs
}
},
*/
'is()': {
'simple selectors': function () {
expect(CSSselect.is(el, 'li')).to.be.ok(); //tag
expect(CSSselect.is(el, '*')).to.be.ok(); //wildcard
expect(CSSselect.is(el, '#attr-child-boosh')).to.be.ok(); //#id
expect(CSSselect.is(el, '[attr]')).to.be.ok(); //[attr]
expect(CSSselect.is(el, '[attr=boosh]')).to.be.ok(); //[attr=val]
expect(CSSselect.is(el, 'div')).to.not.be.ok(); //wrong tag
expect(CSSselect.is(el, '#foo')).to.not.be.ok(); //wrong #id
expect(CSSselect.is(el, '[foo]')).to.not.be.ok(); //wrong [attr]
expect(CSSselect.is(el, '[attr=foo]')).to.not.be.ok(); //wrong [attr=val]
},
'selector sequences': function () {
expect(CSSselect.is(el, 'li#attr-child-boosh[attr=boosh]')).to.be.ok(); //tag#id[attr=val]
expect(CSSselect.is(el, 'div#attr-child-boosh[attr=boosh]')).to.not.be.ok(); //wrong tag#id[attr=val]
},
'selector sequences combinators': function () {
expect(CSSselect.is(el, 'ol li')).to.be.ok(); //tag tag
expect(CSSselect.is(el, 'ol>li')).to.be.ok(); //tag>tag
expect(CSSselect.is(el, 'ol>li+li')).to.be.ok(); //tab>tag+tag
expect(CSSselect.is(el, 'ol#list li#attr-child-boosh[attr=boosh]')).to.be.ok(); //tag#id tag#id[attr=val]
expect(CSSselect.is(el, 'ol#list>li#attr-child-boosh[attr=boosh]')).to.not.be.ok(); //wrong tag#id>tag#id[attr=val]
expect(CSSselect.is(el, 'ol ol li#attr-child-boosh[attr=boosh]')).to.be.ok(); //tag tag tag#id[attr=val]
expect(CSSselect.is(CSSselect('#token-four', document)[0], 'div#fixtures>div a')).to.be.ok(); //tag#id>tag tag where ambiguous middle tag requires backtracking
},
'pseudos': function() {
//TODO: more tests!
expect(CSSselect.is(el, 'li:contains(hello)')).to.be.ok(); //matching :contains(text)
expect(CSSselect.is(el, 'li:contains(human)')).to.not.be.ok(); //non-matching :contains(text)
expect(CSSselect.is(CSSselect('#list>li', document)[2], ':humanoid')).to.be.ok(); //matching custom pseudo
expect(CSSselect.is(CSSselect('#list>li', document)[1], ':humanoid')).to.not.be.ok(); //non-matching custom pseudo
}/*,
'context': function () {
expect(CSSselect.is(el, 'li#attr-child-boosh[attr=boosh]', CSSselect('#list', document)[0])).to.be.ok(); //context
expect(CSSselect.is(el, 'ol#list li#attr-child-boosh[attr=boosh]', CSSselect('#boosh', document)[0])).to.not.be.ok(); //wrong context
}*/
},
'selecting elements in other documents': {
'get element by id': function () {
var result = CSSselect('#hsoob', doc);
expect(result[0]).to.be.ok(); //found element with id=hsoob
},
'get elements by class': function () {
expect(CSSselect('#hsoob .a', doc)).to.have.length(2); //found two elements
expect(CSSselect('#hsoob div.a', doc)[0]).to.be.ok(); //found one element
expect(CSSselect('#hsoob div', doc)).to.have.length(2); //found two {div} elements
expect(CSSselect('#hsoob span', doc)[0]).to.be.ok(); //found one {span} element
expect(CSSselect('#hsoob div div', doc)[0]).to.be.ok(); //found a single div
expect(CSSselect('p.odd', doc)).to.have.length(1); //found single br
},
'complex selectors': function () {
expect(CSSselect('.d ~ .sib', doc)).to.have.length(2); //found one ~ sibling
expect(CSSselect('.a .d + .sib', doc)).to.have.length(1); //found 2 + siblings
expect(CSSselect('#hsoob > div > .h', doc)).to.have.length(1); //found span using child selectors
expect(CSSselect('.a .d ~ .sib[test="f g"]', doc)).to.have.length(1); //found 1 ~ sibling with test attribute
},
'byId sub-queries': function () {
expect(CSSselect('#hsoob #spanny', doc)).to.have.length(1); //found "#id #id" in frame
expect(CSSselect('.a #spanny', doc)).to.have.length(1); //found ".class #id" in frame
expect(CSSselect('.a #booshTest #spanny', doc)).to.have.length(1); //found ".class #id #id" in frame
//ok(CSSselect('> #hsoob', doc).length == 1, 'found "> #id" in frame') --> would be good to support this, needs some tweaking though
},
'byId sub-queries within sub-context': function () {
expect(CSSselect('#spanny', CSSselect('#hsoob', doc))).to.have.length(1); //found "#id -> #id" in frame
expect(CSSselect('.a #spanny', CSSselect('#hsoob', doc))).to.have.length(1); //found ".class #id" in frame
expect(CSSselect('.a #booshTest #spanny', CSSselect('#hsoob', doc))).to.have.length(1); //found ".class #id #id" in frame
expect(CSSselect('.a > #booshTest', CSSselect('#hsoob', doc))).to.have.length(1); //found "> .class #id" in frame
expect(CSSselect('#booshTest', CSSselect('#spanny', doc)).length).to.not.be.ok(); //shouldn't find #booshTest (ancestor) within #spanny (descendent)
expect(CSSselect('#booshTest', CSSselect('#lonelyHsoob', doc)).length).to.not.be.ok(); //shouldn't find #booshTest within #lonelyHsoob (unrelated)
}
}
};

View File

@@ -0,0 +1,76 @@
var helper = require("./helper.js"),
doc = helper.getFile("W3C_Selectors.html"),
CSSselect = require("../"),
soupselect = require("cheerio-soupselect"),
selectors = ["body", "div", "body div", "div p", "div > p", "div + p", "div ~ p", "div[class^=exa][class$=mple]", "div p a", "div, p, a", ".note", "div.example", "ul .tocline2", "div.example, div.note", "#title", "h1#title", "div #title", "ul.toc li.tocline2", "ul.toc > li.tocline2", "h1#title + div > p", "h1[id]:contains(Selectors)", "a[href][lang][class]", "div[class]", "div[class=example]", "div[class^=exa]", "div[class$=mple]", "div[class*=e]", "div[class|=dialog]", "div[class!=made_up]", "div[class~=example]"/*, "div:not(.example)", "p:contains(selectors)", "p:nth-child(even)", "p:nth-child(2n)", "p:nth-child(odd)", "p:nth-child(2n+1)", "p:nth-child(n)", "p:only-child", "p:last-child", "p:first-child"*/];
var engines = [function(a,b){return CSSselect.iterate(b,a);}, soupselect.select];
//returns true when an error occurs
function testResult(rule, index){
var results = engines
.map(function(func){ return func(doc, rule); });
//check if both had the same result
for(var i = 1; i < results.length; i++){
//TODO: might be hard to debug with more engines
if(results[i-1].length !== results[i].length){
//console.log(rule, results[i-1].length, results[i].length);
return true;
}
for(var j = 0; j < results[i].length; j++){
if(results[i-1][j] !== results[i][j]){
if(results[i-1].indexOf(results[i][j]) === -1){
return true;
}
}
}
//require("assert").deepEqual(results[i-1], results[i], rule + ": not the same elements");
}
return false;
}
selectors.filter(testResult).forEach(function(rule){ print(rule, "failed!\n"); });
process.exit(0); //don't run speed tests
print("-----\n\nChecking performance\n\n");
//test the speed
var ben = require("ben");
function testSpeed(rule){
print(rule, Array(28-rule.length).join(" "));
var results = engines
.map(function(func){ return function(){ return func(doc, rule); }});
//also add a precompiled CSSselect test
var compiled = CSSselect(rule);
results.unshift(function(){ return CSSselect.iterate(compiled, doc); });
results = results.map(ben);
var min = Math.min.apply(null, results);
var max = Math.max.apply(null, results);
results.forEach(function(result){
if(result === min) return print(" +", result, "+");
if(result === max) return print(" !", result, "!");
if(Math.abs(result-min) > Math.abs(result-max)){
return print(" =", result, "=");
}
print(" ~", result, "~");
});
print("\n");
}
print("RULE ", "CSSselect (pc)", "CSSselect", "soupselect\n");
selectors.forEach(testSpeed);
function print(){
process.stdout.write(Array.prototype.join.call(arguments, " "));
}

View File

@@ -0,0 +1,34 @@
{
"name": "cheerio-select",
"version": "0.0.3",
"description": "Selector engine for cheerio",
"keywords": [],
"author": {
"name": "Matt Mueller",
"email": "mattmuelle@gmail.com"
},
"dependencies": {
"CSSselect": "0.x"
},
"devDependencies": {
"mocha": "*",
"cheerio": "*",
"expect.js": "*",
"underscore": "*"
},
"main": "index",
"engines": {
"node": ">= 0.4.7"
},
"scripts": {
"test": "make test"
},
"readme": "\n# cheerio-select [![Build Status](https://secure.travis-ci.org/MatthewMueller/cheerio-select.png?branch=master)](http://travis-ci.org/MatthewMueller/cheerio-select)\n\n Tiny wrapper around FB55's excellent [CSSselect](https://github.com/FB55/CSSselect) library.\n\n cheerio-select provides a comprehensive test suite based on sizzle's test suite. \n\n > Warning: Currently, not all tests pass, and some sizzle features will not be supported\n\n## Usage\n\n var select = require('cheerio-select'),\n parse = require('cheerio').parse,\n dom = parse('<ul id = \"fruits\"><li class = \"apple\">Apple</li></ul>');\n\n select('#fruits > .apple', dom);\n => [{...}]\n\n## TODO \n\n* Get all the unit tests to pass!\n\n## Run tests\n\n npm install\n make test\n\n## License \n\n(The MIT License)\n\nCopyright (c) 2012 Matt Mueller &lt;mattmuelle@gmail.com&gt;\n\nPermission is hereby granted, free of charge, to any person obtaining\na copy of this software and associated documentation files (the\n'Software'), to deal in the Software without restriction, including\nwithout limitation the rights to use, copy, modify, merge, publish,\ndistribute, sublicense, and/or sell copies of the Software, and to\npermit persons to whom the Software is furnished to do so, subject to\nthe following conditions:\n\nThe above copyright notice and this permission notice shall be\nincluded in all copies or substantial portions of the Software.\n\nTHE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,\nEXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF\nMERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.\nIN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY\nCLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,\nTORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE\nSOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.",
"readmeFilename": "Readme.md",
"_id": "cheerio-select@0.0.3",
"dist": {
"shasum": "84b5fc11cb2f2ab67bfa917439b918200721c3ce"
},
"_from": "cheerio-select@*",
"_resolved": "https://registry.npmjs.org/cheerio-select/-/cheerio-select-0.0.3.tgz"
}

11
node_modules/cheerio/node_modules/entities/LICENSE generated vendored Normal file
View File

@@ -0,0 +1,11 @@
Copyright (c) Felix Böhm
All rights reserved.
Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
THIS IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS,
EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View File

@@ -0,0 +1 @@
{"amp;":"\u0026","apos;":"\u0027","gt;":"\u003e","lt;":"\u003c","quot;":"\u0022"}

79
node_modules/cheerio/node_modules/entities/index.js generated vendored Normal file
View File

@@ -0,0 +1,79 @@
var re_hex = /&#x[\da-f]+;?/gi,
re_strictHex = /&#x[\da-f]+;/gi,
re_charCode = /&#\d+;?/g,
re_strictCharCode = /&#\d+;/g,
re_notUTF8 = /[\u00c0-\u00d6\u00d8-\u00f6\u00f8-\u02ff\u0370-\u037d\u037f-\u1fff\u200c\u200d\u2070-\u218f\u2c00-\u2fef\u3001-\ud7ff\uf900-\ufdcf\ufdf0-\ufffd]/g,
fromCharCode = String.fromCharCode,
num_func = function(num){return fromCharCode(parseInt(num.substr(2), 10));},
hex_func = function(hex){return fromCharCode(parseInt(hex.substr(3), 16));},
strictNum_func = function(num){return fromCharCode(num.slice(2, -1));},
strictHex_func = function(num){return fromCharCode(parseInt(num.slice(3, -1), 16));},
charCode_func = function(c){ return "&#" +c.charCodeAt(0) +";";};
var fetch = function(filename, inherits){
var obj = require("./entities/" +filename +".json");
if(inherits) for(var name in inherits) obj[name] = inherits[name];
var re = Object.keys(obj).sort().join("|").replace(/(\w+)\|\1;/g, "$1;?");
return {
func: function(name){
return obj[name.substr(1)];
},
re: new RegExp("&(?:" +re +")", "g"),
obj: obj
};
};
var getReverse = function(obj){
var reverse = Object.keys(obj).reduce(function(reverse, name){
reverse[obj[name]] = name;
return reverse;
}, {});
return {
func: function(name){ return "&" +reverse[name]; },
re: new RegExp("\\" +Object.keys(reverse).sort().join("|\\"), "g")
};
};
var modes = ["XML", "HTML4", "HTML5"];
module.exports = {
decode: function(data, level){
if(!modes[level]) level = 0;
return module.exports["decode" +modes[level]](data);
},
encode: function(data, level){
if(!modes[level]) level = 0;
return module.exports["encode" +modes[level]](data);
}
};
var tmp;
modes.forEach(function(name){
var obj = fetch(name.toLowerCase(), tmp),
regex = obj.re,
func = obj.func;
tmp = obj.obj;
module.exports["decode" +name] = function(data){
return data
.replace(regex, func)
.replace(re_hex, hex_func)
.replace(re_charCode, num_func);
};
var reverse = getReverse(obj.obj),
reverse_re = reverse.re,
reverse_func = reverse.func;
module.exports["encode" +name] = function(data){
return data
.replace(reverse_re, reverse_func)
.replace(re_notUTF8, charCode_func);
};
});

View File

@@ -0,0 +1,29 @@
{
"name": "entities",
"version": "0.2.1",
"description": "Encode & decode XML/HTML entities with ease",
"author": {
"name": "Felix Boehm",
"email": "me@feedic.com"
},
"keywords": [
"html",
"xml",
"entity",
"encoding"
],
"main": "./index.js",
"repository": {
"type": "git",
"url": "git://github.com/fb55/node-entities.git"
},
"license": "BSD-like",
"readme": "#entities\n\nEn- & decoder for XML/HTML entities.\n\n####Features:\n* Focussed on ___speed___\n* Supports three levels of entities: __XML__, __HTML4__ & __HTML5__\n * Supports _char code_ entities (eg. `&#x55;`)\n * Special optimizations for XML: A more restrictive syntax allows faster parsing\n\n##How to…\n\n###…install `entities`\n\n npm install entities\n\n###…use `entities`\n\n```javascript\n//encoding\nrequire(\"entities\").encode(<str> data[, <int> level]);\n//decoding\nrequire(\"entities\").decode(<str> data[, <int> level]);\n```\n\nThe `level` attribute indicates what level of entities should be decoded (0 = XML, 1 = HTML4 and 2 = HTML5). The default is 0 (read: XML).\n\nThere are also methods to access the level directly. Just append the name of the level to the action and you're ready to go (e.g. `encodeHTML4(data)`, `decodeXML(data)`).\n\n##TODO\n* There should be a way to remove tables that aren't used. The HTML5 table is pretty heavy, if it's not needed, it shouldn't be kept in memory.",
"readmeFilename": "readme.md",
"_id": "entities@0.2.1",
"dist": {
"shasum": "7aae886864887067f79f252a04c45309f4ac7980"
},
"_from": "entities@0.x",
"_resolved": "https://registry.npmjs.org/entities/-/entities-0.2.1.tgz"
}

31
node_modules/cheerio/node_modules/entities/readme.md generated vendored Normal file
View File

@@ -0,0 +1,31 @@
#entities
En- & decoder for XML/HTML entities.
####Features:
* Focussed on ___speed___
* Supports three levels of entities: __XML__, __HTML4__ & __HTML5__
* Supports _char code_ entities (eg. `&#x55;`)
* Special optimizations for XML: A more restrictive syntax allows faster parsing
##How to…
###…install `entities`
npm install entities
###…use `entities`
```javascript
//encoding
require("entities").encode(<str> data[, <int> level]);
//decoding
require("entities").decode(<str> data[, <int> level]);
```
The `level` attribute indicates what level of entities should be decoded (0 = XML, 1 = HTML4 and 2 = HTML5). The default is 0 (read: XML).
There are also methods to access the level directly. Just append the name of the level to the action and you're ready to go (e.g. `encodeHTML4(data)`, `decodeXML(data)`).
##TODO
* There should be a way to remove tables that aren't used. The HTML5 table is pretty heavy, if it's not needed, it shouldn't be kept in memory.

23
node_modules/cheerio/node_modules/entities/test.js generated vendored Normal file
View File

@@ -0,0 +1,23 @@
var ben = require("ben"),
decode = require("./").decodeXML,
encode = require("./").encode,
decoded = "asdf & ÿ ü '",
encoded = encode(decoded);
(function(result){
if(result !== "asdf &amp; &#255; &#252; &apos;"){
throw Error("Unexpected output: " + result);
}
}(encode(decoded)));
var tmp = Array(201).join(decoded);
console.log("Encoding:", ben(function(){ encode(tmp); }));
(function(result){
if(result !== decoded){
throw Error("Unexpected output: " + result);
}
}(decode(encoded, 2)));
tmp = Array(201).join(encoded);
console.log("Decoding:", ben(function(){ decode(tmp, 2); }));

View File

@@ -0,0 +1,5 @@
language: node_js
node_js:
- 0.6
- 0.8
- 0.9

18
node_modules/cheerio/node_modules/htmlparser2/LICENSE generated vendored Normal file
View File

@@ -0,0 +1,18 @@
Copyright 2010, 2011, Chris Winberry <chris@winberry.net>. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
deal in the Software without restriction, including without limitation the
rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
sell copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
IN THE SOFTWARE.

View File

@@ -0,0 +1,72 @@
#htmlparser2 [![Build Status](https://secure.travis-ci.org/fb55/node-htmlparser.png)](http://travis-ci.org/fb55/node-htmlparser)
A forgiving HTML/XML/RSS parser written in JS for NodeJS. The parser can handle streams (chunked data) and supports custom handlers for writing custom DOMs/output.
##Installing
npm install htmlparser2
##Usage
```javascript
var htmlparser = require("htmlparser2");
var parser = new htmlparser.Parser({
onopentag: function(name, attribs){
if(name === "script" && attribs.type === "text/javascript"){
console.log("JS! Hooray!");
}
},
ontext: function(text){
console.log("-->", text);
},
onclosetag: function(tagname){
if(tagname === "script"){
console.log("That's it?!");
}
}
});
parser.write("Xyz <script language= javascript>var foo = '<<bar>>';< / script>");
parser.done();
```
Output (simplified):
```javascript
--> Xyz
JS! Hooray!
--> var foo = '<<bar>>';
That's it?!
```
Read more about the parser in the [wiki](https://github.com/FB55/node-htmlparser/wiki/Parser-options).
##Get a DOM
The `DomHandler` (known as `DefaultHandler` in the original `htmlparser` module) produces a DOM (document object model) that can be manipulated using the `DomUtils` helper.
The `DomHandler`, while still bundled with this module, was recently moved to it's [own module](https://github.com/FB55/domhandler). Have a look at it for further information.
##Parsing RSS/RDF/Atom Feeds
```javascript
new htmlparser.FeedHandler(function(<error> error, <object> feed){
...
});
```
##Performance
Using a slightly modified version of [node-expat](https://github.com/astro/node-expat)s `bench.js`, I received the following results (on a MacBook (late 2010)):
* [htmlparser](https://github.com/tautologistics/node-htmlparser): 51779 el/s
* [sax.js](https://github.com/isaacs/sax-js): 53169 el/s
* [node-expat](https://github.com/astro/node-expat): 103388 el/s
* [htmlparser2](https://github.com/fb55/node-htmlparser): 118614 el/s
The test may be found in `tests/bench.js`.
##How is this different from [node-htmlparser](https://github.com/tautologistics/node-htmlparser)?
This is a fork of the project above. The main difference is that this is intended to be used only with node (it runs on other platforms using [browserify](https://github.com/substack/node-browserify)). Besides, the code is much better structured, has less duplications and is remarkably faster than the original.
The parser now provides a callback interface close to [sax.js](https://github.com/isaacs/sax-js) (originally targeted at [readabilitySAX](https://github.com/fb55/readabilitysax)). I also fixed a couple of bugs & included some pull requests for the original project (eg. [RDF feed support](https://github.com/tautologistics/node-htmlparser/pull/35)).
The support for location data and verbose output was removed a couple of versions ago. It's still available in the [verbose branch](https://github.com/FB55/node-htmlparser/tree/verbose).
The `DefaultHandler` and the `RssHandler` were renamed to clarify their purpose (to `DomHandler` and `FeedHandler`). The old names are still available when requiring `htmlparser2`, so your code should work as expected.

View File

@@ -0,0 +1,10 @@
//Types of elements found in the DOM
module.exports = {
Text: 0, //Text
Directive: 1, //<? ... ?>
Comment: 2, //<!-- ... -->
Script: 3, //<script> tags
Style: 4, //<style> tags
Tag: 5, //Any tag
CDATA: 6 //<![CDATA[ ... ]]>
};

View File

@@ -0,0 +1,87 @@
var index = require("./index.js"),
DomHandler = index.DomHandler,
DomUtils = index.DomUtils;
//TODO: make this a streamable handler
function FeedHandler(callback){
this.init(callback, { ignoreWhitespace: true });
}
require("util").inherits(FeedHandler, DomHandler);
FeedHandler.prototype.init = DomHandler;
function getElements(what, where, one, recurse){
if(one) return DomUtils.getElementsByTagName(what, where, recurse, 1)[0];
return DomUtils.getElementsByTagName(what, where, recurse);
}
function fetch(what, where, recurse){
var ret = DomUtils.getElementsByTagName(what, where, recurse, 1);
return ret.length > 0 && ret[0].children.length > 0 && ret[0].children[0].data;
}
var isValidFeed = function(value) {
return value === "rss" || value === "feed" || value === "rdf:RDF";
};
FeedHandler.prototype.onend = function() {
var feed = {},
feedRoot = getElements(isValidFeed, this.dom, true),
tmp, childs;
if (feedRoot) {
if(feedRoot.name === "feed"){
childs = feedRoot.children;
feed.type = "atom";
if(tmp = fetch("id", childs)) feed.id = tmp;
if(tmp = fetch("title", childs)) feed.title = tmp;
if((tmp = getElements("link", childs, true)) && (tmp = tmp.attribs) && (tmp = tmp.href)) feed.link = tmp;
if(tmp = fetch("subtitle", childs)) feed.description = tmp;
if(tmp = fetch("updated", childs)) feed.updated = new Date(tmp);
if(tmp = fetch("email", childs, true)) feed.author = tmp;
feed.items = getElements("entry", childs).map(function(item){
var entry = {}, tmp;
item = item.children;
if(tmp = fetch("id", item)) entry.id = tmp;
if(tmp = fetch("title", item)) entry.title = tmp;
if((tmp = getElements("link", item, true)) && (tmp = tmp.attribs) && (tmp = tmp.href)) entry.link = tmp;
if(tmp = fetch("summary", item)) entry.description = tmp;
if(tmp = fetch("updated", item)) entry.pubDate = new Date(tmp);
return entry;
});
} else{
childs = getElements("channel", feedRoot.children, true).children;
feed.type = feedRoot.name.substr(0, 3);
feed.id = "";
if(tmp = fetch("title", childs)) feed.title = tmp;
if(tmp = fetch("link", childs)) feed.link = tmp;
if(tmp = fetch("description", childs)) feed.description = tmp;
if(tmp = fetch("lastBuildDate", childs)) feed.updated = new Date(tmp);
if(tmp = fetch("managingEditor", childs)) feed.author = tmp;
feed.items = getElements("item", feedRoot.children).map(function(item){
var entry = {}, tmp;
item = item.children;
if(tmp = fetch("guid", item)) entry.id = tmp;
if(tmp = fetch("title", item)) entry.title = tmp;
if(tmp = fetch("link", item)) entry.link = tmp;
if(tmp = fetch("description", item)) entry.description = tmp;
if(tmp = fetch("pubDate", item)) entry.pubDate = new Date(tmp);
return entry;
});
}
}
this.dom = feed;
DomHandler.prototype._handleCallback.call(
this, feedRoot ? null : Error("couldn't find root of feed")
);
};
module.exports = FeedHandler;

View File

@@ -0,0 +1,397 @@
var ElementType = require("./ElementType.js");
function Parser(cbs, options){
this._options = options || defaultOpts;
this._cbs = cbs || defaultCbs;
this._buffer = "";
this._tagSep = ">";
this._stack = [];
this._wroteSpecial = false;
this._contentFlags = 0;
this._done = false;
this._running = true; //false if paused
}
//Regular expressions used for cleaning up and parsing (stateless)
/* http://dev.w3.org/html5/html-author/#attributes
* - Whitespace is permitted after the tag name, but it is not permitted before the tag name.
* - Attribute names must consist of one or more characters other than the space characters,
* control characters, NULL, one of the characters: double quote ("), single quote ('),
* greater-than sign (>), solidus (/), equals sign (=), nor any characters that are not defined by Unicode.
* - An empty attribute is one where the value has been omitted. (<input disabled>...</input>
* - An unquoted attribute value must not contain any literal space characters, any of the characters:
* double quote ("), apostrophe ('), equals sign (=), less-than sign (<), greater-than sign (>),
* or grave accent (`), and the value must not be the empty string.
* - There may be space characters between the attribute name and the equals sign (=),
* and between that and the attribute value.
* - Double-quoted attributes must not contain any double-quote characters or ambiguous ampersands.
* - Single-quoted attributes must not contain any single-quote characters or ambiguous ampersands.
*/
// element name: (<[^<& ]+)
// attribute name: ( [^"'=>\/]+)
// attribute value: (\s*=\s*(?:
// "([^"]*)"|
// '([^']*)'|
// [^\s"'=<>`]+)
// tag end: (?=\s|\/|$)
var _reAttrib = /\s+([^"'=>\/\s]+)(?:\s*=\s*(?:"([^"]*)"|'([^']*)'|([^\s"'=<>`]+))|(?=\s)|\/|$)/g,
_reTail = /\s|\/|$/;
var defaultOpts = {
xmlMode: false, //Special behavior for script/style tags by default
lowerCaseAttributeNames: false, //call .toLowerCase for each attribute name
lowerCaseTags: false //call .toLowerCase for each tag name
};
var defaultCbs = {
/*
This is just a plain object
so that the parser doesn't
throw if no arguments were
provided.
*/
/*
oncdataend,
oncdatastart,
onclosetag,
oncomment,
oncommentend,
onerror,
onopentag,
onopentagend,
onprocessinginstruction,
onreset,
ontext
*/
};
var formTags = {
input: true,
option: true,
optgroup: true,
select: true,
button: true,
datalist: true,
textarea: true
};
var openImpliesClose = {
tr : { tr:true, th:true, td:true },
th : { th:true },
td : { thead:true, td:true },
body : { head:true, link:true, script:true },
li : { li:true },
p : { p:true },
select : formTags,
input : formTags,
output : formTags,
button : formTags,
datalist: formTags,
textarea: formTags,
option : { option:true },
optgroup: { optgroup:true }
};
//Parses a complete HTML and pushes it to the handler
Parser.prototype.parseComplete = function(data){
this.reset();
this.end(data);
};
//Parses a piece of an HTML document
Parser.prototype.parseChunk =
Parser.prototype.write = function(data){
if(this._done) this._handleError("Attempted to parse chunk after parsing already done");
this._buffer += data; //FIXME: this can be a bottleneck
if(this._running) this._parseTags();
};
//Tells the parser that the HTML being parsed is complete
Parser.prototype.done =
Parser.prototype.end = function(chunk){
if(this._done) return;
if(chunk) this.write(chunk);
this._done = true;
if(this._running) this._finishParsing();
};
Parser.prototype._finishParsing = function(){
//Parse the buffer to its end
if(this._buffer) this._parseTags(true);
if(this._cbs.onclosetag){
while(this._stack.length) this._cbs.onclosetag(this._stack.pop());
}
if(this._cbs.onend) this._cbs.onend();
};
Parser.prototype.pause = function(){
if(!this._done) this._running = false;
};
Parser.prototype.resume = function(){
if(this._running) return;
this._running = true;
this._parseTags();
if(this._done) this._finishParsing();
};
//Resets the parser to a blank state, ready to parse a new HTML document
Parser.prototype.reset = function(){
Parser.call(this, this._cbs, this._options);
if(this._cbs.onreset) this._cbs.onreset();
};
//Extracts the base tag name from the data value of an element
Parser.prototype._parseTagName = function(data){
var match = data.substr(0, data.search(_reTail));
if(!this._options.lowerCaseTags) return match;
return match.toLowerCase();
};
//Special tags that are treated differently
var SpecialTags = {};
//SpecialTags[ElementType.Tag] = 0x0;
SpecialTags[ElementType.Style] = 0x1; //2^0
SpecialTags[ElementType.Script] = 0x2; //2^1
SpecialTags[ElementType.Comment] = 0x4; //2^2
SpecialTags[ElementType.CDATA] = 0x8; //2^3
var TagValues = {
style: 1,
script: 2
};
//Parses through HTML text and returns an array of found elements
Parser.prototype._parseTags = function(force){
var current = 0,
opening = this._buffer.indexOf("<"),
closing = this._buffer.indexOf(">"),
next, rawData, elementData, lastTagSep;
//if force is true, parse everything
if(force) opening = Infinity;
//opening !== closing is just false if both are -1
while(opening !== closing && this._running){
lastTagSep = this._tagSep;
if((opening !== -1 && opening < closing) || closing === -1){
next = opening;
this._tagSep = "<";
opening = this._buffer.indexOf("<", next + 1);
}
else{
next = closing;
this._tagSep = ">";
closing = this._buffer.indexOf(">", next + 1);
}
rawData = this._buffer.substring(current, next); //The next chunk of data to parse
//set elements for next run
current = next + 1;
if(this._contentFlags >= SpecialTags[ElementType.CDATA]){
// We're inside a CDATA section
this._writeCDATA(rawData);
}
else if(this._contentFlags >= SpecialTags[ElementType.Comment]){
//We're in a comment tag
this._writeComment(rawData);
}
else if(lastTagSep === "<"){
elementData = rawData.trimLeft();
if(elementData.charAt(0) === "/"){
//elementData = elementData.substr(1).trim();
elementData = this._parseTagName(elementData.substr(1));
if(this._contentFlags !== 0){
//if it's a closing tag, remove the flag
if(this._contentFlags & TagValues[elementData]){
//remove the flag
this._contentFlags ^= TagValues[elementData];
} else {
this._writeSpecial(rawData, lastTagSep);
continue;
}
}
this._processCloseTag(elementData);
}
else if(this._contentFlags !== 0) this._writeSpecial(rawData, lastTagSep);
else if(elementData.charAt(0) === "!"){
if(elementData.substr(1, 7) === "[CDATA["){
this._contentFlags |= SpecialTags[ElementType.CDATA];
if(this._cbs.oncdatastart) this._cbs.oncdatastart();
this._writeCDATA(elementData.substr(8));
}
else if(this._contentFlags !== 0) this._writeSpecial(rawData, lastTagSep);
else if(elementData.substr(1, 2) === "--"){
//This tag is a comment
this._contentFlags |= SpecialTags[ElementType.Comment];
this._writeComment(rawData.substr(3));
}
//TODO: This isn't a processing instruction, needs a new name
else if(this._cbs.onprocessinginstruction){
this._cbs.onprocessinginstruction(
"!" + this._parseTagName(elementData.substr(1)),
elementData
);
}
}
else if(elementData.charAt(0) === "?"){
if(this._cbs.onprocessinginstruction){
this._cbs.onprocessinginstruction(
"?" + this._parseTagName(elementData.substr(1)),
elementData
);
}
}
else this._processOpenTag(elementData);
}
else{
if(this._contentFlags !== 0){
this._writeSpecial(rawData, ">");
}
else if(this._cbs.ontext){
if(this._tagSep === ">") rawData += ">"; //it's the second > in a row
if(rawData !== "") this._cbs.ontext(rawData);
}
}
}
this._buffer = this._buffer.substr(current);
};
Parser.prototype._writeCDATA = function(data){
if(this._tagSep === ">" && data.substr(-2) === "]]"){
// CDATA ends
if(data.length !== 2 && this._cbs.ontext){
this._cbs.ontext(data.slice(0,-2));
}
this._contentFlags ^= SpecialTags[ElementType.CDATA];
if(this._cbs.oncdataend) this._cbs.oncdataend();
this._wroteSpecial = false;
}
else if(this._cbs.ontext) this._cbs.ontext(data + this._tagSep);
};
Parser.prototype._writeComment = function(rawData){
if(this._tagSep === ">" && rawData.substr(-2) === "--"){ //comment ends
//remove the written flag (also removes the comment flag)
this._contentFlags ^= SpecialTags[ElementType.Comment];
this._wroteSpecial = false;
if(this._cbs.oncomment) this._cbs.oncomment(rawData.slice(0, -2));
if(this._cbs.oncommentend) this._cbs.oncommentend();
}
else if(this._cbs.oncomment) this._cbs.oncomment(rawData + this._tagSep);
};
Parser.prototype._writeSpecial = function(rawData, lastTagSep){
//if the previous element is text, append the last tag sep to element
if(this._wroteSpecial){
if(this._cbs.ontext) this._cbs.ontext(lastTagSep + rawData);
}
else{ //The previous element was not text
this._wroteSpecial = true;
if(rawData !== "" && this._cbs.ontext) this._cbs.ontext(rawData);
}
};
var emptyTags = {
__proto__: null,
area: true,
base: true,
basefont: true,
br: true,
col: true,
frame: true,
hr: true,
img: true,
input: true,
isindex: true,
link: true,
meta: true,
param: true,
embed: true
};
Parser.prototype._processCloseTag = function(name){
if(this._stack && (!(name in emptyTags) || this._options.xmlMode)){
var pos = this._stack.lastIndexOf(name);
if(pos !== -1)
if(this._cbs.onclosetag){
pos = this._stack.length - pos;
while(pos--) this._cbs.onclosetag(this._stack.pop());
}
else this._stack.splice(pos);
}
//many browsers (eg. Safari, Chrome) convert </br> to <br>
else if(name === "br" && !this._options.xmlMode){
this._processOpenTag(name + "/");
}
};
Parser.prototype._parseAttributes = function(data, lcNames){
for(var match; match = _reAttrib.exec(data);){
this._cbs.onattribute(lcNames ? match[1].toLowerCase() : match[1], match[2] || match[3] || match[4] || "");
}
};
//parses the attribute string
var parseAttributes = function(data, lcNames){
var attrs = {};
for(var match; match = _reAttrib.exec(data);){
attrs[lcNames ? match[1].toLowerCase() : match[1]] = match[2] || match[3] || match[4] || "";
}
return attrs;
};
Parser.prototype._processOpenTag = function(data){
var name = this._parseTagName(data),
attributes = parseAttributes(data, this._options.lowerCaseAttributeNames),
type = ElementType.Tag;
if(this._options.xmlMode){ /*do nothing*/ }
else if(name === "script") type = ElementType.Script;
else if(name === "style") type = ElementType.Style;
if (!this._options.xmlMode && name in openImpliesClose) {
var el;
while ((el = this._stack[this._stack.length-1]) in openImpliesClose[name]) {
this._processCloseTag(el);
}
}
if(this._cbs.onopentagname) this._cbs.onopentagname(name);
if(this._cbs.onopentag) this._cbs.onopentag(name, attributes);
if(this._cbs.onattribute){
this._parseAttributes(data, this._options.lowerCaseAttributeNames);
}
//If tag self-terminates, add an explicit, separate closing tag
/* http://dev.w3.org/html5/html-author/#tags
* In XHTML, self-closing tags are valid but attribute values must be quoted.
* In HTML, self-closing tags must be either void elements or foreign elements.
* Invalid HTML self-closing tag syntax is ignored (treated as an opening tag).
* Foreign elements use XML rules
*/
if((!this._options.xmlMode && name in emptyTags) || (data.substr(-1) === "/" && data.replace(_reAttrib, "").substr(-1) === "/")){
if(this._cbs.onclosetag) this._cbs.onclosetag(name);
} else {
if(type !== ElementType.Tag){
this._contentFlags |= SpecialTags[type];
this._wroteSpecial = false;
}
this._stack.push(name);
}
};
Parser.prototype._handleError = function(error){
error = new Error(error);
if(this._cbs.onerror) this._cbs.onerror(error);
else throw error;
};
module.exports = Parser;

View File

@@ -0,0 +1,19 @@
var ProxyHandler = function(cbs){
if(cbs) this._cbs = cbs;
};
ProxyHandler.prototype._cbs = {};
Object.keys(require("./").EVENTS).forEach(function(name){
name = "on" + name;
Object.defineProperty(ProxyHandler.prototype, name, {
enumerable:true, configurable:true,
get: function(){ return this._cbs[name]; },
set: function(value){
//allow functions to be overwritten
Object.defineProperty(this, name, {value: value});
}
});
});
module.exports = ProxyHandler;

View File

@@ -0,0 +1,35 @@
var WritableStream = require("./WritableStream.js");
var Stream = function(options){
WritableStream.call(this, new cbs(this), options);
};
require("util").inherits(Stream, WritableStream);
Stream.prototype.readable = true;
var cbs = function(scope){
this.scope = scope;
};
var EVENTS = require("../").EVENTS;
Object.keys(EVENTS).forEach(function(name){
if(EVENTS[name] === 0){
cbs.prototype["on" + name] = function(){
this.scope.emit(name);
};
} else if(EVENTS[name] === 1){
cbs.prototype["on" + name] = function(a){
this.scope.emit(name, a);
};
} else if(EVENTS[name] === 2){
cbs.prototype["on" + name] = function(a, b){
this.scope.emit(name, a, b);
};
} else {
throw Error("wrong number of arguments!");
}
});
module.exports = Stream;

View File

@@ -0,0 +1,19 @@
var Parser = require("./Parser.js");
var WritableStream = function(cbs, options){
Parser.call(this, cbs, options);
};
require("util").inherits(WritableStream, require("stream").Stream);
//util.inherits would overwrite the prototype when called twice,
//so we need a different approach
Object.getOwnPropertyNames(Parser.prototype).forEach(function(name){
WritableStream.prototype[name] = Parser.prototype[name];
});
WritableStream.prototype.writable = true;
// TODO improve support for Parser#pause and Parser#continue
module.exports = WritableStream;

View File

@@ -0,0 +1,60 @@
var defineProp = Object.defineProperty;
module.exports = {
get Parser(){
defineProp(this, "Parser", {value:require("./Parser.js")});
return this.Parser;
},
get DomHandler(){
defineProp(this, "DomHandler", {value:require("domhandler")});
return this.DomHandler;
},
get FeedHandler(){
defineProp(this, "FeedHandler", {value:require("./FeedHandler.js")});
return this.FeedHandler;
},
get ElementType(){
defineProp(this, "ElementType", {value:require("domelementtype")});
return this.ElementType;
},
get Stream(){
defineProp(this, "Stream", {value:require("./Stream.js")});
return this.Stream;
},
get WritableStream(){
defineProp(this, "WritableStream", {value:require("./WritableStream.js")});
return this.WritableStream;
},
get ProxyHandler(){
defineProp(this, "ProxyHandler", {value:require("./ProxyHandler.js")});
return this.ProxyHandler;
},
get DomUtils(){
defineProp(this, "DomUtils", {value:require("domutils")});
return this.DomUtils;
},
// For legacy support
get DefaultHandler(){
defineProp(this, "DefaultHandler", {value: this.DomHandler});
return this.DefaultHandler;
},
get RssHandler(){
defineProp(this, "RssHandler", {value: this.FeedHandler});
return this.FeedHandler;
},
// List of all events that the parser emits
EVENTS: { /* Format: eventname: number of arguments */
attribute: 2,
cdatastart: 0,
cdataend: 0,
text: 1,
processinginstruction: 2,
comment: 1,
commentend: 0,
closetag: 1,
opentag: 2,
opentagname: 1,
error: 1,
end: 0
}
}

View File

@@ -0,0 +1,11 @@
Copyright (c) Felix Böhm
All rights reserved.
Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
THIS IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS,
EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

View File

@@ -0,0 +1,14 @@
//Types of elements found in the DOM
module.exports = {
Text: "text", //Text
Directive: "directive", //<? ... ?>
Comment: "comment", //<!-- ... -->
Script: "script", //<script> tags
Style: "style", //<style> tags
Tag: "tag", //Any tag
CDATA: "cdata", //<![CDATA[ ... ]]>
isTag: function(elem){
return elem.type === "tag" || elem.type === "script" || elem.type === "style";
}
};

View File

@@ -0,0 +1,26 @@
{
"name": "domelementtype",
"version": "1.1.1",
"description": "all the types of nodes in htmlparser2's dom",
"main": "index.js",
"repository": {
"type": "git",
"url": "git://github.com/FB55/domelementtype.git"
},
"keywords": [
"dom",
"htmlparser2"
],
"author": {
"name": "Felix Boehm",
"email": "me@feedic.com"
},
"readme": "all the types of nodes in htmlparser2's dom\n",
"readmeFilename": "readme.md",
"_id": "domelementtype@1.1.1",
"dist": {
"shasum": "7887acbda7614bb0a3dbe1b5e394f77a8ed297cf"
},
"_from": "domelementtype@1",
"_resolved": "https://registry.npmjs.org/domelementtype/-/domelementtype-1.1.1.tgz"
}

View File

@@ -0,0 +1 @@
all the types of nodes in htmlparser2's dom

View File

@@ -0,0 +1,6 @@
language: node_js
node_js:
- 0.4
- 0.6
- 0.8
- 0.9

View File

@@ -0,0 +1,159 @@
var ElementType = require("domelementtype");
function DomHandler(callback, options, elementCB){
if(typeof callback === "object"){
elementCB = options;
options = callback;
callback = null;
} else if(typeof options === "function"){
elementCB = options;
options = defaultOpts;
}
this._callback = callback;
this._options = options || defaultOpts;
this._elementCB = elementCB;
this.dom = [];
this._done = false;
this._tagStack = [];
}
//default options
var defaultOpts = {
ignoreWhitespace: false //Keep whitespace-only text nodes
};
//Resets the handler back to starting state
DomHandler.prototype.onreset = function(){
DomHandler.call(this, this._callback, this._options, this._elementCB);
};
//Signals the handler that parsing is done
DomHandler.prototype.onend = function(){
if(this._done) return;
this._done = true;
this._handleCallback(null);
};
DomHandler.prototype._handleCallback =
DomHandler.prototype.onerror = function(error){
if(typeof this._callback === "function"){
this._callback(error, this.dom);
} else {
if(error) throw error;
}
};
DomHandler.prototype.onclosetag = function(name){
//if(this._tagStack.pop().name !== name) this._handleCallback(Error("Tagname didn't match!"));
var elem = this._tagStack.pop();
if(this._elementCB) this._elementCB(elem);
};
DomHandler.prototype._addDomElement = function(element){
var lastTag = this._tagStack[this._tagStack.length - 1];
if(lastTag){
lastTag.children.push(element);
} else { //There aren't parent elements
this.dom.push(element);
}
};
DomHandler.prototype.onopentag = function(name, attribs){
var lastTag = this._tagStack[this._tagStack.length - 1];
var element = {
type: name === "script" ? ElementType.Script : name === "style" ? ElementType.Style : ElementType.Tag,
name: name,
attribs: attribs,
children: [],
prev: null,
next: null,
parent: lastTag || null
};
if(lastTag){
var idx = lastTag.children.length;
while(idx > 0){
if(ElementType.isTag(lastTag.children[--idx])){
element.prev = lastTag.children[idx];
lastTag.children[idx].next = element;
break;
}
}
lastTag.children.push(element);
} else {
this.dom.push(element);
}
this._tagStack.push(element);
};
DomHandler.prototype.ontext = function(data){
if(this._options.ignoreWhitespace && data.trim() === "") return;
var lastTag;
if(
(lastTag = this._tagStack[this._tagStack.length - 1]) &&
(lastTag = lastTag.children[lastTag.children.length - 1]) &&
lastTag.type === ElementType.Text
){
lastTag.data += data;
return;
}
this._addDomElement({
data: data,
type: ElementType.Text
});
};
DomHandler.prototype.oncomment = function(data){
var lastTag = this._tagStack[this._tagStack.length - 1];
if(lastTag && lastTag.type === ElementType.Comment){
lastTag.data += data;
return;
}
var element = {
data: data,
type: ElementType.Comment
};
this._addDomElement(element);
this._tagStack.push(element);
};
//TODO remove duplicated code
DomHandler.prototype.oncdata = function(data){
var lastTag = this._tagStack[this._tagStack.length - 1];
if(lastTag && lastTag.type === ElementType.CDATA){
lastTag.data += data;
return;
}
var element = {
data: data,
type: ElementType.CDATA
};
this._addDomElement(element);
this._tagStack.push(element);
};
DomHandler.prototype.oncommentend = DomHandler.prototype.oncdataend = function(){
this._tagStack.pop();
};
DomHandler.prototype.onprocessinginstruction = function(name, data){
this._addDomElement({
name: name,
data: data,
type: ElementType.Directive
});
};
module.exports = DomHandler;

View File

@@ -0,0 +1,38 @@
{
"name": "domhandler",
"version": "2.0.2",
"description": "htmlparser2's dom as a separate module",
"main": "index.js",
"directories": {
"test": "tests"
},
"scripts": {
"test": "node runtests.js"
},
"repository": {
"type": "git",
"url": "git://github.com/FB55/domhandler.git"
},
"keywords": [
"dom",
"htmlparser2"
],
"dependencies": {
"domelementtype": "1"
},
"devDependencies": {
"htmlparser2": "2.3"
},
"author": {
"name": "Felix Boehm",
"email": "me@feedic.com"
},
"readme": "#DOMHandler [![Build Status](https://secure.travis-ci.org/fb55/DomHandler.png)](http://travis-ci.org/fb55/DomHandler)\n\nThe DOM handler (formally known as DefaultHandler) creates a tree containing all nodes of a page. The tree may be manipulated using the DOMUtils library.\n\n##Usage\n```javascript\nvar handler = new DomHandler([ <func> callback(err, dom), ] [ <obj> options ]);\n// var parser = new Parser(handler[, options]);\n```\n\n##Example\n```javascript\nvar htmlparser = require(\"htmlparser2\");\nvar rawHtml = \"Xyz <script language= javascript>var foo = '<<bar>>';< / script><!--<!-- Waah! -- -->\";\nvar handler = new htmlparser.DomHandler(function (error, dom) {\n if (error)\n \t[...do something for errors...]\n else\n \t[...parsing done, do something...]\n console.log(dom);\n});\nvar parser = new htmlparser.Parser(handler);\nparser.write(rawHtml);\nparser.done();\n```\n\nOutput:\n\n```javascript\n[{\n data: 'Xyz ',\n type: 'text'\n}, {\n type: 'script',\n name: 'script',\n attribs: {\n \tlanguage: 'javascript'\n },\n children: [{\n \tdata: 'var foo = \\'<bar>\\';<',\n \ttype: 'text'\n }]\n}, {\n data: '<!-- Waah! -- ',\n type: 'comment'\n}]\n```\n\n##Option: ignoreWhitespace\nIndicates whether the DOM should exclude text nodes that consists solely of whitespace. The default value is \"false\". \n\nThe following HTML will be used:\n\n```html\n<font>\n\t<br>this is the text\n<font>\n```\n\n###Example: true\n\n```javascript\n[{\n type: 'tag',\n name: 'font',\n children: [{\n \ttype: 'tag',\n \tname: 'br'\n }, {\n \tdata: 'this is the text\\n',\n \ttype: 'text'\n }, {\n \ttype: 'tag',\n \tname: 'font'\n }]\n}]\n```\n\n###Example: false\n\n```javascript\n[{\n\ttype: 'tag',\n name: 'font',\n children: [{\n \tdata: '\\n\\t',\n \ttype: 'text'\n }, {\n \ttype: 'tag',\n \tname: 'br'\n }, {\n \tdata: 'this is the text\\n',\n \ttype: 'text'\n }, {\n \ttype: 'tag',\n \tname: 'font'\n }]\n}]\n```",
"readmeFilename": "readme.md",
"_id": "domhandler@2.0.2",
"dist": {
"shasum": "ca597f42c440173c64d80d838cca738164f1b5ac"
},
"_from": "domhandler@2.0",
"_resolved": "https://registry.npmjs.org/domhandler/-/domhandler-2.0.2.tgz"
}

View File

@@ -0,0 +1,99 @@
#DOMHandler [![Build Status](https://secure.travis-ci.org/fb55/DomHandler.png)](http://travis-ci.org/fb55/DomHandler)
The DOM handler (formally known as DefaultHandler) creates a tree containing all nodes of a page. The tree may be manipulated using the DOMUtils library.
##Usage
```javascript
var handler = new DomHandler([ <func> callback(err, dom), ] [ <obj> options ]);
// var parser = new Parser(handler[, options]);
```
##Example
```javascript
var htmlparser = require("htmlparser2");
var rawHtml = "Xyz <script language= javascript>var foo = '<<bar>>';< / script><!--<!-- Waah! -- -->";
var handler = new htmlparser.DomHandler(function (error, dom) {
if (error)
[...do something for errors...]
else
[...parsing done, do something...]
console.log(dom);
});
var parser = new htmlparser.Parser(handler);
parser.write(rawHtml);
parser.done();
```
Output:
```javascript
[{
data: 'Xyz ',
type: 'text'
}, {
type: 'script',
name: 'script',
attribs: {
language: 'javascript'
},
children: [{
data: 'var foo = \'<bar>\';<',
type: 'text'
}]
}, {
data: '<!-- Waah! -- ',
type: 'comment'
}]
```
##Option: ignoreWhitespace
Indicates whether the DOM should exclude text nodes that consists solely of whitespace. The default value is "false".
The following HTML will be used:
```html
<font>
<br>this is the text
<font>
```
###Example: true
```javascript
[{
type: 'tag',
name: 'font',
children: [{
type: 'tag',
name: 'br'
}, {
data: 'this is the text\n',
type: 'text'
}, {
type: 'tag',
name: 'font'
}]
}]
```
###Example: false
```javascript
[{
type: 'tag',
name: 'font',
children: [{
data: '\n\t',
type: 'text'
}, {
type: 'tag',
name: 'br'
}, {
data: 'this is the text\n',
type: 'text'
}, {
type: 'tag',
name: 'font'
}]
}]
```

View File

@@ -0,0 +1,51 @@
var fs = require("fs"),
path = require("path"),
assert = require("assert"),
Parser = require("htmlparser2").Parser,
Handler = require("./");
var basePath = path.resolve(__dirname, "tests"),
chunkSize = 5;
fs
.readdirSync(basePath)
.filter(RegExp.prototype.test, /\.json$/) //only allow .json files
.map(function(name){
return path.resolve(basePath, name);
})
.map(require)
.forEach(function(test){
console.log("Testing:", test.name);
var handler = new Handler(function(err, dom){
assert.ifError(err);
compare(test.expected, dom);
}, test.options.handler);
var data = test.html;
var parser = new Parser(handler, test.options.parser);
//first, try to run the test via chunks
for(var i = 0; i < data.length; i+=chunkSize){
parser.write(data.substring(i, i + chunkSize));
}
parser.done();
//then parse everything
parser.parseComplete(data);
});
console.log("\nAll tests passed!");
function compare(expected, result){
assert.equal(typeof expected, typeof result, "types didn't match");
if(typeof expected !== "object" || expected === null){
assert.strictEqual(expected, result, "result doesn't equal expected");
} else {
for(var prop in expected){
assert.ok(prop in result, "result didn't contain property " + prop);
compare(expected[prop], result[prop]);
}
}
}

View File

@@ -0,0 +1,44 @@
{
"name": "Basic test",
"options": {
"handler": {},
"parser": {}
},
"html": "<!DOCTYPE html><html><title>The Title</title><body>Hello world</body></html>",
"expected": [
{
"name": "!DOCTYPE",
"data": "!DOCTYPE html",
"type": "directive"
},
{
"type": "tag",
"name": "html",
"attribs": {},
"children": [
{
"type": "tag",
"name": "title",
"attribs": {},
"children": [
{
"data": "The Title",
"type": "text"
}
]
},
{
"type": "tag",
"name": "body",
"attribs": {},
"children": [
{
"data": "Hello world",
"type": "text"
}
]
}
]
}
]
}

View File

@@ -0,0 +1,24 @@
{
"name": "Single Tag 1",
"options": {
"handler": {},
"parser": {}
},
"html": "<br>text</br>",
"expected": [
{
"type": "tag",
"name": "br",
"attribs": {}
},
{
"data": "text",
"type": "text"
},
{
"type": "tag",
"name": "br",
"attribs": {}
}
]
}

View File

@@ -0,0 +1,24 @@
{
"name": "Single Tag 2",
"options": {
"handler": {},
"parser": {}
},
"html": "<br>text<br>",
"expected": [
{
"type": "tag",
"name": "br",
"attribs": {}
},
{
"data": "text",
"type": "text"
},
{
"type": "tag",
"name": "br",
"attribs": {}
}
]
}

View File

@@ -0,0 +1,30 @@
{
"name": "Unescaped chars in script",
"options": {
"handler": {},
"parser": {}
},
"html": "<head><script language=\"Javascript\">var foo = \"<bar>\"; alert(2 > foo); var baz = 10 << 2; var zip = 10 >> 1; var yap = \"<<>>>><<\";</script></head>",
"expected": [
{
"type": "tag",
"name": "head",
"attribs": {},
"children": [
{
"type": "script",
"name": "script",
"attribs": {
"language": "Javascript"
},
"children": [
{
"data": "var foo = \"<bar>\"; alert(2 > foo); var baz = 10 << 2; var zip = 10 >> 1; var yap = \"<<>>>><<\";",
"type": "text"
}
]
}
]
}
]
}

View File

@@ -0,0 +1,21 @@
{
"name": "Special char in comment",
"options": {
"handler": {},
"parser": {}
},
"html": "<head><!-- commented out tags <title>Test</title>--></head>",
"expected": [
{
"type": "tag",
"name": "head",
"attribs": {},
"children": [
{
"data": " commented out tags <title>Test</title>",
"type": "comment"
}
]
}
]
}

View File

@@ -0,0 +1,21 @@
{
"name": "Script source in comment",
"options": {
"handler": {},
"parser": {}
},
"html": "<script><!--var foo = 1;--></script>",
"expected": [
{
"type": "script",
"name": "script",
"attribs": {},
"children": [
{
"data": "<!--var foo = 1;-->",
"type": "text"
}
]
}
]
}

View File

@@ -0,0 +1,23 @@
{
"name": "Unescaped chars in style",
"options": {
"handler": {},
"parser": {}
},
"html": "<style type=\"text/css\">\n body > p\n\t{ font-weight: bold; }</style>",
"expected": [
{
"type": "style",
"name": "style",
"attribs": {
"type": "text/css"
},
"children": [
{
"data": "\n body > p\n\t{ font-weight: bold; }",
"type": "text"
}
]
}
]
}

View File

@@ -0,0 +1,23 @@
{
"name": "Extra spaces in tag",
"options": {
"handler": {},
"parser": {}
},
"html": "<\n font\t\n size='14' \n>the text<\n /\t\nfont\t \n>",
"expected": [
{
"type": "tag",
"name": "font",
"attribs": {
"size": "14"
},
"children": [
{
"data": "the text",
"type": "text"
}
]
}
]
}

View File

@@ -0,0 +1,23 @@
{
"name": "Unquoted attributes",
"options": {
"handler": {},
"parser": {}
},
"html": "<font size= 14>the text</font>",
"expected": [
{
"type": "tag",
"name": "font",
"attribs": {
"size": "14"
},
"children": [
{
"data": "the text",
"type": "text"
}
]
}
]
}

View File

@@ -0,0 +1,18 @@
{
"name": "Singular attribute",
"options": {
"handler": {},
"parser": {}
},
"html": "<option value='foo' selected>",
"expected": [
{
"type": "tag",
"name": "option",
"attribs": {
"value": "foo",
"selected": ""
}
}
]
}

View File

@@ -0,0 +1,23 @@
{
"name": "Text outside tags",
"options": {
"handler": {},
"parser": {}
},
"html": "Line one\n<br>\nline two",
"expected": [
{
"data": "Line one\n",
"type": "text"
},
{
"type": "tag",
"name": "br",
"attribs": {}
},
{
"data": "\nline two",
"type": "text"
}
]
}

View File

@@ -0,0 +1,14 @@
{
"name": "Only text",
"options": {
"handler": {},
"parser": {}
},
"html": "this is the text",
"expected": [
{
"data": "this is the text",
"type": "text"
}
]
}

View File

@@ -0,0 +1,22 @@
{
"name": "Comment within text",
"options": {
"handler": {},
"parser": {}
},
"html": "this is <!-- the comment --> the text",
"expected": [
{
"data": "this is ",
"type": "text"
},
{
"data": " the comment ",
"type": "comment"
},
{
"data": " the text",
"type": "text"
}
]
}

View File

@@ -0,0 +1,21 @@
{
"name": "Comment within text within script",
"options": {
"handler": {},
"parser": {}
},
"html": "<script>this is <!-- the comment --> the text</script>",
"expected": [
{
"type": "script",
"name": "script",
"attribs": {},
"children": [
{
"data": "this is <!-- the comment --> the text",
"type": "text"
}
]
}
]
}

View File

@@ -0,0 +1,25 @@
{
"name": "Option 'verbose' set to 'false'",
"options": {
"handler": {
"verbose": false
},
"parser": {}
},
"html": "<\n font\t\n size='14' \n>the text<\n /\t\nfont\t \n>",
"expected": [
{
"type": "tag",
"name": "font",
"attribs": {
"size": "14"
},
"children": [
{
"data": "the text",
"type": "text"
}
]
}
]
}

View File

@@ -0,0 +1,46 @@
{
"name": "Options 'ignoreWhitespace' set to 'true'",
"options": {
"handler": {
"ignoreWhitespace": true
},
"parser": {}
},
"html": "Line one\n<br> \t\n<br>\nline two<font>\n <br> x </font>",
"expected": [
{
"data": "Line one\n",
"type": "text"
},
{
"type": "tag",
"name": "br",
"attribs": {}
},
{
"type": "tag",
"name": "br",
"attribs": {}
},
{
"data": "\nline two",
"type": "text"
},
{
"type": "tag",
"name": "font",
"attribs": {},
"children": [
{
"type": "tag",
"name": "br",
"attribs": {}
},
{
"data": " x ",
"type": "text"
}
]
}
]
}

View File

@@ -0,0 +1,21 @@
{
"name": "XML Namespace",
"options": {
"handler": {},
"parser": {}
},
"html": "<ns:tag>text</ns:tag>",
"expected": [
{
"type": "tag",
"name": "ns:tag",
"attribs": {},
"children": [
{
"data": "text",
"type": "text"
}
]
}
]
}

View File

@@ -0,0 +1,19 @@
{
"name": "Enforce empty tags",
"options": {
"handler": {},
"parser": {}
},
"html": "<link>text</link>",
"expected": [
{
"type": "tag",
"name": "link",
"attribs": {}
},
{
"data": "text",
"type": "text"
}
]
}

View File

@@ -0,0 +1,23 @@
{
"name": "Ignore empty tags (xml mode)",
"options": {
"handler": {},
"parser": {
"xmlMode": true
}
},
"html": "<link>text</link>",
"expected": [
{
"type": "tag",
"name": "link",
"attribs": {},
"children": [
{
"data": "text",
"type": "text"
}
]
}
]
}

View File

@@ -0,0 +1,23 @@
{
"name": "Template script tags",
"options": {
"handler": {},
"parser": {}
},
"html": "<script type=\"text/template\"><h1>Heading1</h1></script>",
"expected": [
{
"type": "script",
"name": "script",
"attribs": {
"type": "text/template"
},
"children": [
{
"data": "<h1>Heading1</h1>",
"type": "text"
}
]
}
]
}

View File

@@ -0,0 +1,18 @@
{
"name": "Conditional comments",
"options": {
"handler": {},
"parser": {}
},
"html": "<!--[if lt IE 7]> <html class='no-js ie6 oldie' lang='en'> <![endif]--><!--[if lt IE 7]> <html class='no-js ie6 oldie' lang='en'> <![endif]-->",
"expected": [
{
"data": "[if lt IE 7]> <html class='no-js ie6 oldie' lang='en'> <![endif]",
"type": "comment"
},
{
"data": "[if lt IE 7]> <html class='no-js ie6 oldie' lang='en'> <![endif]",
"type": "comment"
}
]
}

View File

@@ -0,0 +1,46 @@
{
"name": "Basic test",
"options": {
"handler": {},
"parser": {
"lowerCaseTags": true
}
},
"html": "<!DOCTYPE html><HTML><TITLE>The Title</title><BODY>Hello world</body></html>",
"expected": [
{
"name": "!doctype",
"data": "!DOCTYPE html",
"type": "directive"
},
{
"type": "tag",
"name": "html",
"attribs": {},
"children": [
{
"type": "tag",
"name": "title",
"attribs": {},
"children": [
{
"data": "The Title",
"type": "text"
}
]
},
{
"type": "tag",
"name": "body",
"attribs": {},
"children": [
{
"data": "Hello world",
"type": "text"
}
]
}
]
}
]
}

View File

@@ -0,0 +1,224 @@
var ElementType = require("domelementtype"),
DomUtils = module.exports;
function find(test, arr, recurse, limit){
var result = [], childs;
for(var i = 0, j = arr.length; i < j; i++){
if(test(arr[i])){
result.push(arr[i]);
if(--limit <= 0) break;
}
childs = arr[i].children;
if(recurse && childs && childs.length > 0){
childs = find(test, childs, recurse, limit);
result = result.concat(childs);
limit -= childs.length;
if(limit <= 0) break;
}
}
return result;
}
function findOne(test, arr, recurse){
for(var i = 0, l = arr.length; i < l; i++){
if(test(arr[i])) return arr[i];
if(recurse && arr[i].children && arr[i].children.length > 0){
var elem = findOne(test, arr[i].children, true);
if(elem) return elem;
}
}
return null;
}
function findAll(test, arr){
return arr.reduce(function(arr, elem){
if(elem.children && elem.children.length > 0){
return arr.concat(findAll(test, elem.children));
} else {
return arr;
}
}, arr.filter(test));
}
var isTag = DomUtils.isTag = function(elem){
return elem.type === ElementType.Tag || elem.type === ElementType.Script || elem.type === ElementType.Style;
};
function filter(test, element, recurse, limit){
if(!Array.isArray(element)) element = [element];
if(typeof limit !== "number" || limit === Infinity){
if(recurse === false){
return element.filter(test);
} else {
return findAll(test, element);
}
} else if(limit === 1){
element = findOne(test, element, recurse !== false);
return element ? [element] : [];
} else {
return find(test, element, recurse !== false, limit);
}
}
DomUtils.filter = filter;
DomUtils.testElement = function(options, element){
for(var key in options){
if(!options.hasOwnProperty(key));
else if(key === "tag_name"){
if(!isTag(element) || !options.tag_name(element.name)){
return false;
}
} else if(key === "tag_type"){
if(!options.tag_type(element.type)) return false;
} else if(key === "tag_contains"){
if(isTag(element) || !options.tag_contains(element.data)){
return false;
}
} else if(!element.attribs || !options[key](element.attribs[key])){
return false;
}
}
return true;
};
var Checks = {
tag_name: function(name){
if(typeof name === "function"){
return function(elem){ return isTag(elem) && name(elem.name); };
} else if(name === "*"){
return isTag;
} else {
return function(elem){ return isTag(elem) && elem.name === name; };
}
},
tag_type: function(type){
if(typeof type === "function"){
return function(elem){ return type(elem.type); };
} else {
return function(elem){ return elem.type === type; };
}
},
tag_contains: function(data){
if(typeof type === "function"){
return function(elem){ return !isTag(elem) && data(elem.data); };
} else {
return function(elem){ return !isTag(elem) && elem.data === data; };
}
}
};
function getAttribCheck(attrib, value){
if(typeof value === "function"){
return function(elem){ return elem.attribs && value(elem.attribs[attrib]); };
} else {
return function(elem){ return elem.attribs && elem.attribs[attrib] === value; };
}
}
DomUtils.getElements = function(options, element, recurse, limit){
var funcs = [];
for(var key in options){
if(options.hasOwnProperty(key)){
if(key in Checks) funcs.push(Checks[key](options[key]));
else funcs.push(getAttribCheck(key, options[key]));
}
}
if(funcs.length === 0) return [];
if(funcs.length === 1) return filter(funcs[0], element, recurse, limit);
return filter(
function(elem){
return funcs.some(function(func){ return func(elem); });
},
element, recurse, limit
);
};
DomUtils.getElementById = function(id, element, recurse){
if(!Array.isArray(element)) element = [element];
return findOne(getAttribCheck("id", id), element, recurse !== false);
};
DomUtils.getElementsByTagName = function(name, element, recurse, limit){
return filter(Checks.tag_name(name), element, recurse, limit);
};
DomUtils.getElementsByTagType = function(type, element, recurse, limit){
return filter(Checks.tag_type(type), element, recurse, limit);
};
DomUtils.removeElement = function(elem){
if(elem.prev) elem.prev.next = elem.next;
if(elem.next) elem.next.prev = elem.prev;
if(elem.parent){
elem.parent.children.splice(elem.parent.children.lastIndexOf(elem), 1);
}
};
DomUtils.getInnerHTML = function(elem){
if(!elem.children) return "";
var childs = elem.children,
childNum = childs.length,
ret = "";
for(var i = 0; i < childNum; i++){
ret += DomUtils.getOuterHTML(childs[i]);
}
return ret;
};
//boolean attributes without a value (taken from MatthewMueller/cheerio)
var booleanAttribs = {
__proto__: null,
async: true,
autofocus: true,
autoplay: true,
checked: true,
controls: true,
defer: true,
disabled: true,
hidden: true,
loop: true,
multiple: true,
open: true,
readonly: true,
required: true,
scoped: true,
selected: true,
"/": true //TODO when is this required?
};
DomUtils.getOuterHTML = function(elem){
var type = elem.type;
if(type === ElementType.Text) return elem.data;
if(type === ElementType.Comment) return "<!--" + elem.data + "-->";
if(type === ElementType.Directive) return "<" + elem.data + ">";
if(type === ElementType.CDATA) return "<!CDATA " + elem.data + "]]>";
var ret = "<" + elem.name;
if("attribs" in elem){
for(var attr in elem.attribs){
if(elem.attribs.hasOwnProperty(attr)){
ret += " " + attr;
var value = elem.attribs[attr];
if(!value){
if( !(attr in booleanAttribs) ){
ret += '=""';
}
} else {
ret += '="' + value + '"';
}
}
}
}
return ret + ">" + DomUtils.getInnerHTML(elem) + "</" + elem.name + ">";
};

View File

@@ -0,0 +1,39 @@
{
"name": "domutils",
"version": "1.0.1",
"description": "utilities for working with htmlparser2's dom",
"main": "index.js",
"directories": {
"test": "tests"
},
"scripts": {
"test": "node tests/00-runtests.js"
},
"repository": {
"type": "git",
"url": "git://github.com/FB55/domutils.git"
},
"keywords": [
"dom",
"htmlparser2"
],
"dependencies": {
"domelementtype": "1"
},
"devDependencies": {
"htmlparser2": "2.3",
"domhandler": "2"
},
"author": {
"name": "Felix Boehm",
"email": "me@feedic.com"
},
"readme": "utilities for working with htmlparser2's dom\n",
"readmeFilename": "readme.md",
"_id": "domutils@1.0.1",
"dist": {
"shasum": "58b58d774774911556c16b8b02d99c609d987869"
},
"_from": "domutils@1.0",
"_resolved": "https://registry.npmjs.org/domutils/-/domutils-1.0.1.tgz"
}

View File

@@ -0,0 +1 @@
utilities for working with htmlparser2's dom

View File

@@ -0,0 +1,64 @@
var fs = require("fs"),
assert = require("assert");
var runCount = 0,
testCount = 0;
function compare(expected, result){
if(typeof expected !== typeof result){
throw Error("types didn't match");
}
if(typeof expected !== "object" || expected === null){
if(expected !== result){
throw Error("result doesn't equal expected");
}
return;
}
for(var prop in expected){
if(!(prop in result)) throw Error("result didn't contain property " + prop);
compare(expected[prop], result[prop]);
}
}
function runTests(test){
//read files, load them, run them
fs.readdirSync(__dirname + test.dir
).map(function(file){
if(file[0] === ".") return false;
if(file.substr(-5) === ".json") return JSON.parse(
fs.readFileSync(__dirname + test.dir + file)
);
return require(__dirname + test.dir + file);
}).forEach(function(file){
if(!file) return;
var second = false;
runCount++;
console.log("Testing:", file.name);
test.test(file, function(err, dom){
assert.ifError(err);
compare(file.expected, dom);
if(second){
runCount--;
testCount++;
}
else second = true;
});
});
console.log("->", test.dir.slice(1, -1), "started");
}
//run all tests
[
"./02-dom_utils.js"
].map(require).forEach(runTests);
//log the results
(function check(){
if(runCount !== 0) return process.nextTick(check);
console.log("Total tests:", testCount);
}());

View File

@@ -0,0 +1,15 @@
//generate a dom
var handler = new (require("domhandler"))();
(new (require("htmlparser2").Parser)(handler)).parseComplete(
Array(21).join("<?xml><tag1 id='asdf'> <script>text</script> <!-- comment --> <tag2> text </tag1>")
);
var dom = handler.dom;
exports.dir = "/DomUtils/";
exports.test = function(test, cb){
cb(null, test.getElements(dom));
cb(null, test.getByFunction(dom));
};

View File

@@ -0,0 +1,56 @@
var DomUtils = require("../..");
exports.name = "Get element by id";
exports.getElements = function(dom){
return DomUtils.getElements({id:"asdf"}, dom, true, 1)[0];
};
exports.getByFunction = function(dom){
return DomUtils.getElementById("asdf", dom, true);
};
exports.expected = {
"type": "tag",
"name": "tag1",
"attribs": {
"id": "asdf"
},
"children": [
{
"data": " ",
"type": "text"
},
{
"type": "script",
"name": "script",
"attribs": {},
"children": [
{
"data": "text",
"type": "text"
}
]
},
{
"data": " ",
"type": "text"
},
{
"data": " comment ",
"type": "comment"
},
{
"data": " ",
"type": "text"
},
{
"type": "tag",
"name": "tag2",
"attribs": {},
"children": [
{
"data": " text ",
"type": "text"
}
]
}
]
};

View File

@@ -0,0 +1,23 @@
var DomUtils = require("../..");
exports.name = "Get elements by tagName";
exports.getElements = function(dom){
return DomUtils.getElements({tag_name:"tag2"}, dom, true);
};
exports.getByFunction = function(dom){
return DomUtils.getElementsByTagName("tag2", dom, true);
};
exports.expected = [];
for(var i = 0; i < 20; i++) exports.expected.push(
{
"type": "tag",
"name": "tag2",
"attribs": {},
"children": [
{
"data": " text ",
"type": "text"
}
]
}
);

View File

@@ -0,0 +1,23 @@
var DomUtils = require("../..");
exports.name = "Get elements by type";
exports.getElements = function(dom){
return DomUtils.getElements({tag_type:"script"}, dom, true);
};
exports.getByFunction = function(dom){
return DomUtils.getElementsByTagType("script", dom, true);
};
exports.expected = [];
for(var i = 0; i < 20; i++) exports.expected.push(
{
"type": "script",
"name": "script",
"attribs": {},
"children": [
{
"data": "text",
"type": "text"
}
]
}
);

View File

@@ -0,0 +1,10 @@
var DomUtils = require("../..");
exports.name = "Get outer HTML";
exports.getElements = function(dom){
return '<tag1 id="asdf"> <script>text</script> <!-- comment --> <tag2> text </tag2></tag1>';
};
exports.getByFunction = function(dom){
return DomUtils.getOuterHTML(DomUtils.getElementById("asdf", dom, true));
};
exports.expected = '<tag1 id="asdf"> <script>text</script> <!-- comment --> <tag2> text </tag2></tag1>';

View File

@@ -0,0 +1,10 @@
var DomUtils = require("../..");
exports.name = "Get inner HTML";
exports.getElements = function(dom){
return ' <script>text</script> <!-- comment --> <tag2> text </tag2>';
};
exports.getByFunction = function(dom){
return DomUtils.getInnerHTML(DomUtils.getElementById("asdf", dom, true));
};
exports.expected = ' <script>text</script> <!-- comment --> <tag2> text </tag2>';

View File

@@ -0,0 +1,59 @@
{
"name": "htmlparser2",
"description": "Performance-optimized forgiving HTML/XML/RSS parser",
"version": "2.6.0",
"author": {
"name": "Felix Boehm",
"email": "me@feedic.com"
},
"keywords": [
"html",
"parser",
"streams",
"xml",
"dom",
"rss",
"feed",
"atom"
],
"contributors": [
{
"name": "Chris Winberry",
"email": "chris@winberry.net"
}
],
"repository": {
"type": "git",
"url": "git://github.com/fb55/node-htmlparser.git"
},
"bugs": {
"mail": "me@feedic.com",
"url": "http://github.com/fb55/node-htmlparser/issues"
},
"directories": {
"lib": "lib/"
},
"main": "./lib/",
"scripts": {
"test": "node tests/00-runtests.js"
},
"dependencies": {
"domhandler": "2.0",
"domutils": "1.0",
"domelementtype": "1"
},
"licenses": [
{
"type": "MIT",
"url": "http://github.com/tautologistics/node-htmlparser/raw/master/LICENSE"
}
],
"readme": "#htmlparser2 [![Build Status](https://secure.travis-ci.org/fb55/node-htmlparser.png)](http://travis-ci.org/fb55/node-htmlparser)\n\nA forgiving HTML/XML/RSS parser written in JS for NodeJS. The parser can handle streams (chunked data) and supports custom handlers for writing custom DOMs/output.\n\n##Installing\n\tnpm install htmlparser2\n\n##Usage\n\n```javascript\nvar htmlparser = require(\"htmlparser2\");\nvar parser = new htmlparser.Parser({\n\tonopentag: function(name, attribs){\n\t\tif(name === \"script\" && attribs.type === \"text/javascript\"){\n\t\t\tconsole.log(\"JS! Hooray!\");\n\t\t}\n\t},\n\tontext: function(text){\n\t\tconsole.log(\"-->\", text);\n\t},\n\tonclosetag: function(tagname){\n\t\tif(tagname === \"script\"){\n\t\t\tconsole.log(\"That's it?!\");\n\t\t}\n\t}\n});\nparser.write(\"Xyz <script language= javascript>var foo = '<<bar>>';< / script>\");\nparser.done();\n```\n\nOutput (simplified):\n\n```javascript\n--> Xyz \nJS! Hooray!\n--> var foo = '<<bar>>';\nThat's it?!\n```\n\nRead more about the parser in the [wiki](https://github.com/FB55/node-htmlparser/wiki/Parser-options).\n\n##Get a DOM\nThe `DomHandler` (known as `DefaultHandler` in the original `htmlparser` module) produces a DOM (document object model) that can be manipulated using the `DomUtils` helper.\n\nThe `DomHandler`, while still bundled with this module, was recently moved to it's [own module](https://github.com/FB55/domhandler). Have a look at it for further information.\n\n##Parsing RSS/RDF/Atom Feeds\n\n```javascript\nnew htmlparser.FeedHandler(function(<error> error, <object> feed){\n ...\n});\n```\n\n##Performance\nUsing a slightly modified version of [node-expat](https://github.com/astro/node-expat)s `bench.js`, I received the following results (on a MacBook (late 2010)):\n\n* [htmlparser](https://github.com/tautologistics/node-htmlparser): 51779 el/s\n* [sax.js](https://github.com/isaacs/sax-js): 53169 el/s\n* [node-expat](https://github.com/astro/node-expat): 103388 el/s\n* [htmlparser2](https://github.com/fb55/node-htmlparser): 118614 el/s\n\nThe test may be found in `tests/bench.js`.\n\n##How is this different from [node-htmlparser](https://github.com/tautologistics/node-htmlparser)?\nThis is a fork of the project above. The main difference is that this is intended to be used only with node (it runs on other platforms using [browserify](https://github.com/substack/node-browserify)). Besides, the code is much better structured, has less duplications and is remarkably faster than the original. \n\nThe parser now provides a callback interface close to [sax.js](https://github.com/isaacs/sax-js) (originally targeted at [readabilitySAX](https://github.com/fb55/readabilitysax)). I also fixed a couple of bugs & included some pull requests for the original project (eg. [RDF feed support](https://github.com/tautologistics/node-htmlparser/pull/35)).\n\nThe support for location data and verbose output was removed a couple of versions ago. It's still available in the [verbose branch](https://github.com/FB55/node-htmlparser/tree/verbose). \n\nThe `DefaultHandler` and the `RssHandler` were renamed to clarify their purpose (to `DomHandler` and `FeedHandler`). The old names are still available when requiring `htmlparser2`, so your code should work as expected.\n",
"readmeFilename": "README.md",
"_id": "htmlparser2@2.6.0",
"dist": {
"shasum": "b28564ea9d1ba56a104ace6a7b0fdda2f315836f"
},
"_from": "htmlparser2@2.x",
"_resolved": "https://registry.npmjs.org/htmlparser2/-/htmlparser2-2.6.0.tgz"
}

View File

@@ -0,0 +1,49 @@
var fs = require("fs"),
path = require("path"),
assert = require("assert");
var runCount = 0,
testCount = 0,
done = false;
[
"./01-events.js",
"./02-stream.js",
"./03-feed.js"
]
.map(require)
.forEach(function (test){
console.log("\nStarting", test.dir, "\n----");
var dir = path.resolve(__dirname, test.dir);
//read files, load them, run them
var f = fs
.readdirSync(dir)
.filter(RegExp.prototype.test, /^[^\._]/) //ignore all files with a leading dot or underscore
.map(function(name){
return path.resolve(dir, name);
})
.map(require)
.forEach(function(file){
runCount++;
console.log("Testing:", file.name);
var second = false; //every test runs twice
test.test(file, function(err, dom){
assert.ifError(err);
assert.deepEqual(file.expected, dom, "didn't get expected output");
if(second){
testCount++;
if(!--runCount && done){
console.log("Total tests:", testCount);
}
}
else second = true;
});
});
});
var done = true; //started all tests

View File

@@ -0,0 +1,41 @@
var helper = require("./test-helper.js"),
sliceArr = Array.prototype.slice;
exports.dir = "Events";
exports.test = function(test, cb){
var tokens = [], cbs;
if(typeof Proxy !== "undefined"){
cbs = Proxy.create({ get: function(a, name){
if(name === "onend"){
return function(){
cb(null, tokens.splice(0));
}
}
if(name === "onreset") return function(){};
return function(){
tokens.push({
event: name.substr(2),
data: sliceArr.apply(arguments)
});
}
}});
}
else{
cbs = {
onerror: cb,
onend: function(){
cb(null, tokens.splice(0));
}
};
helper.EVENTS.forEach(function(name){
cbs["on" + name] = function(){
tokens.push({
event: name,
data: sliceArr.apply(arguments)
});
}
});
}
helper.writeToParser(cbs, test.options.parser, test.html);
};

View File

@@ -0,0 +1,53 @@
var helper = require("./test-helper.js"),
Stream = require("..").Stream,
sliceArr = Array.prototype.slice,
fs = require("fs");
exports.dir = "Stream";
exports.test = function(test, cb){
var tokens = [],
stream = new Stream(test.options),
second = false;
if(typeof Proxy !== "undefined"){
stream._events = Proxy.create({ get: function(a, name){
if(name === "end"){
return function(){
cb(null, tokens.splice(0));
if(!second){
second = true;
stream.parseComplete(fs.readFileSync(__dirname + test.file).toString());
}
};
}
if(helper.EVENTS.indexOf(name) !== -1) return function(){
tokens.push({
event: name,
data: sliceArr.apply(arguments)
});
};
}});
}
else {
stream._events = {
error: cb,
end: function(){
cb(null, tokens.splice(0));
if(!second){
second = true;
stream.parseComplete(fs.readFileSync(__dirname + test.file).toString());
}
}
};
helper.EVENTS.forEach(function(name){
stream.on(name, function(){
tokens.push({
event: name,
data: sliceArr.apply(arguments)
});
});
});
}
fs.createReadStream(__dirname + test.file).pipe(stream);
};

View File

@@ -0,0 +1,19 @@
//Runs tests for feeds
var helper = require("./test-helper.js"),
FeedHandler = require("../lib/FeedHandler.js"),
fs = require("fs"),
parserOpts = {
xmlMode: true
};
exports.dir = "Feeds";
exports.test = function(test, cb){
var handler = new FeedHandler(function(err, dom){
if(err) cb(err, 0); //return the error
else cb(null, dom);
});
var file = fs.readFileSync(__dirname + "/Documents/" + test.file).toString();
helper.writeToParser(handler, parserOpts, file);
};

View File

@@ -0,0 +1,26 @@
var multiply = function(text){
return Array(5e3+1).join(text);
},
tests = {
self_closing: multiply("<br/>"),
tag: multiply("<tag foo=bar foobar> Text </tag>"),
comment: multiply("<!-- this is <<a> comment -->"),
directive: multiply("<?foo bar?>"),
special: multiply("<script> THIS IS <SPECIAL> </script>"),
xml: multiply("<!directive><tag attr='value'> text <!--Comment<>--></tag>")
},
empty = function(){},
cbs = {};
require("./test-helper.js").EVENTS.forEach(function(name){
cbs["on" + name] = empty;
});
var parser = new (require("../lib/Parser.js"))(cbs),
ben = require("ben");
Object.keys(tests).forEach(function(name){
console.log("Test", name, "took", ben(150, function(){
parser.parseComplete(tests[name]);
}));
});

View File

@@ -0,0 +1,25 @@
<?xml version="1.0" encoding="utf-8"?>
<!-- http://en.wikipedia.org/wiki/Atom_%28standard%29 -->
<feed xmlns="http://www.w3.org/2005/Atom">
<title>Example Feed</title>
<subtitle>A subtitle.</subtitle>
<link href="http://example.org/feed/" rel="self" />
<link href="http://example.org/" />
<id>urn:uuid:60a76c80-d399-11d9-b91C-0003939e0af6</id>
<updated>2003-12-13T18:30:02Z</updated>
<author>
<name>John Doe</name>
<email>johndoe@example.com</email>
</author>
<entry>
<title>Atom-Powered Robots Run Amok</title>
<link href="http://example.org/2003/12/13/atom03" />
<link rel="alternate" type="text/html" href="http://example.org/2003/12/13/atom03.html"/>
<link rel="edit" href="http://example.org/2003/12/13/atom03/edit"/>
<id>urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a</id>
<updated>2003-12-13T18:30:02Z</updated>
<summary>Some text.</summary>
</entry>
</feed>

View File

@@ -0,0 +1 @@
<!DOCTYPE html><html><title>The Title</title><body>Hello world</body></html>

View File

@@ -0,0 +1,63 @@
<?xml version="1.0" encoding="UTF-8"?>
<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns="http://purl.org/rss/1.0/" xmlns:ev="http://purl.org/rss/1.0/modules/event/" xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:syn="http://purl.org/rss/1.0/modules/syndication/" xmlns:dcterms="http://purl.org/dc/terms/" xmlns:admin="http://webns.net/mvcb/">
<channel rdf:about="http://sfbay.craigslist.org/ccc/">
<title>craigslist | all community in SF bay area</title>
<link>http://sfbay.craigslist.org/ccc/</link>
<description/>
<dc:language>en-us</dc:language>
<dc:rights>Copyright 2011 craigslist, inc.</dc:rights>
<dc:publisher>webmaster@craigslist.org</dc:publisher>
<dc:creator>webmaster@craigslist.org</dc:creator>
<dc:source>http://sfbay.craigslist.org/ccc//</dc:source>
<dc:title>craigslist | all community in SF bay area</dc:title>
<dc:type>Collection</dc:type>
<syn:updateBase>2011-11-04T09:39:10-07:00</syn:updateBase>
<syn:updateFrequency>4</syn:updateFrequency>
<syn:updatePeriod>hourly</syn:updatePeriod>
<items>
<rdf:Seq>
<rdf:li rdf:resource="http://sfbay.craigslist.org/sby/muc/2681301534.html"/>
</rdf:Seq>
</items>
</channel>
<item rdf:about="http://sfbay.craigslist.org/sby/muc/2681301534.html">
<title><![CDATA[ Music Equipment Repair and Consignment ]]></title>
<link>
http://sfbay.craigslist.org/sby/muc/2681301534.html
</link>
<description><![CDATA[
San Jose Rock Shop offers musical instrument repair and consignment! (408) 215-2065<br> <br> We are pleased to announce our NEW LOCATION: 1199 N 5th st. San Jose, ca 95112. Please call ahead, by appointment only.<br> <br> Recently featured by Metro Newspaper in their 2011 Best of the Silicon Valley edition see it online here:<br> <a href="http://www.metroactive.com/best-of-silicon-valley/2011/music-nightlife/editor-picks.html" rel="nofollow">http://www.metroactive.com/best-of-silicon-valley/2011/music-nightlife/editor-picks.html</a><br> <br> Guitar Set up (acoustic and electronic) $40!<!-- END CLTAGS -->
]]></description>
<dc:date>2011-11-04T09:35:17-07:00</dc:date>
<dc:language>en-us</dc:language>
<dc:rights>Copyright 2011 craigslist, inc.</dc:rights>
<dc:source>
http://sfbay.craigslist.org/sby/muc/2681301534.html
</dc:source>
<dc:title><![CDATA[ Music Equipment Repair and Consignment ]]></dc:title>
<dc:type>text</dc:type>
<dcterms:issued>2011-11-04T09:35:17-07:00</dcterms:issued>
</item>
<item rdf:about="http://sfbay.craigslist.org/eby/rid/2685010755.html">
<title><![CDATA[
Ride Offered - Oakland/BART to LA/SFV - TODAY 3PM 11/04 (oakland north / temescal)
]]></title>
<link>
http://sfbay.craigslist.org/eby/rid/2685010755.html
</link>
<description><![CDATA[
Im offering a lift for up to two people from Oakland (or near any BART station in the East Bay/580/880 Corridor, or San Jose/Morgan Hill, Gilroy) to the San Fernando Valley / Los Angeles area. Specifically, Im leaving from Oakland between 2:30 and 3:00pm (this is flexible, but if I leave too late my girlfriend will kill me), and heading to Woodland Hills via the 580, I-5, 405, and 101.<!-- END CLTAGS -->
]]></description>
<dc:date>2011-11-04T09:34:54-07:00</dc:date>
<dc:language>en-us</dc:language>
<dc:rights>Copyright 2011 craigslist, inc.</dc:rights>
<dc:source>
http://sfbay.craigslist.org/eby/rid/2685010755.html
</dc:source>
<dc:title><![CDATA[
Ride Offered - Oakland/BART to LA/SFV - TODAY 3PM 11/04 (oakland north / temescal)
]]></dc:title>
<dc:type>text</dc:type>
<dcterms:issued>2011-11-04T09:34:54-07:00</dcterms:issued>
</item>
</rdf:RDF>

View File

@@ -0,0 +1,48 @@
<?xml version="1.0"?>
<!-- http://cyber.law.harvard.edu/rss/examples/rss2sample.xml -->
<rss version="2.0">
<channel>
<title>Liftoff News</title>
<link>http://liftoff.msfc.nasa.gov/</link>
<description>Liftoff to Space Exploration.</description>
<language>en-us</language>
<pubDate>Tue, 10 Jun 2003 04:00:00 GMT</pubDate>
<lastBuildDate>Tue, 10 Jun 2003 09:41:01 GMT</lastBuildDate>
<docs>http://blogs.law.harvard.edu/tech/rss</docs>
<generator>Weblog Editor 2.0</generator>
<managingEditor>editor@example.com</managingEditor>
<webMaster>webmaster@example.com</webMaster>
<item>
<title>Star City</title>
<link>http://liftoff.msfc.nasa.gov/news/2003/news-starcity.asp</link>
<description>How do Americans get ready to work with Russians aboard the International Space Station? They take a crash course in culture, language and protocol at Russia's &lt;a href="http://howe.iki.rssi.ru/GCTC/gctc_e.htm"&gt;Star City&lt;/a&gt;.</description>
<pubDate>Tue, 03 Jun 2003 09:39:21 GMT</pubDate>
<guid>http://liftoff.msfc.nasa.gov/2003/06/03.html#item573</guid>
</item>
<item>
<description>Sky watchers in Europe, Asia, and parts of Alaska and Canada will experience a &lt;a href="http://science.nasa.gov/headlines/y2003/30may_solareclipse.htm"&gt;partial eclipse of the Sun&lt;/a&gt; on Saturday, May 31st.</description>
<pubDate>Fri, 30 May 2003 11:06:42 GMT</pubDate>
<guid>http://liftoff.msfc.nasa.gov/2003/05/30.html#item572</guid>
</item>
<item>
<title>The Engine That Does More</title>
<link>http://liftoff.msfc.nasa.gov/news/2003/news-VASIMR.asp</link>
<description>Before man travels to Mars, NASA hopes to design new engines that will let us fly through the Solar System more quickly. The proposed VASIMR engine would do that.</description>
<pubDate>Tue, 27 May 2003 08:37:32 GMT</pubDate>
<guid>http://liftoff.msfc.nasa.gov/2003/05/27.html#item571</guid>
</item>
<item>
<title>Astronauts' Dirty Laundry</title>
<link>http://liftoff.msfc.nasa.gov/news/2003/news-laundry.asp</link>
<description>Compared to earlier spacecraft, the International Space Station has many luxuries, but laundry facilities are not one of them. Instead, astronauts have other options.</description>
<pubDate>Tue, 20 May 2003 08:56:02 GMT</pubDate>
<guid>http://liftoff.msfc.nasa.gov/2003/05/20.html#item570</guid>
</item>
</channel>
</rss>

View File

@@ -0,0 +1,44 @@
{
"name": "simple",
"options": {
"handler": {},
"parser": {}
},
"html": "<h1 class=test>adsf</h1>",
"expected": [
{
"event": "opentagname",
"data": [
"h1"
]
},
{
"event": "opentag",
"data": [
"h1",
{
"class": "test"
}
]
},
{
"event": "attribute",
"data": [
"class",
"test"
]
},
{
"event": "text",
"data": [
"adsf"
]
},
{
"event": "closetag",
"data": [
"h1"
]
}
]
}

View File

@@ -0,0 +1,62 @@
{
"name": "Template script tags",
"options": {
"handler": {},
"parser": {}
},
"html": "<script type=\"text/template\"><h1>Heading1</h1></script>",
"expected": [
{
"event": "opentagname",
"data": [
"script"
]
},
{
"event": "opentag",
"data": [
"script",
{
"type": "text/template"
}
]
},
{
"event": "attribute",
"data": [
"type",
"text/template"
]
},
{
"event": "text",
"data": [
"<h1"
]
},
{
"event": "text",
"data": [
">Heading1"
]
},
{
"event": "text",
"data": [
"</h1"
]
},
{
"event": "text",
"data": [
">"
]
},
{
"event": "closetag",
"data": [
"script"
]
}
]
}

Some files were not shown because too many files have changed in this diff Show More