all da files

This commit is contained in:
jllord
2013-05-27 13:45:59 -07:00
commit 59d3d30afa
6704 changed files with 1954956 additions and 0 deletions

8
node_modules/cheerio/.npmignore generated vendored Normal file
View File

@@ -0,0 +1,8 @@
src/
support/
tests/
examples/
*.sock
*.tmproj
coverage.html
lib-cov

4
node_modules/cheerio/.travis.yml generated vendored Normal file
View File

@@ -0,0 +1,4 @@
language: node_js
node_js:
- 0.6
- 0.8

223
node_modules/cheerio/History.md generated vendored Normal file
View File

@@ -0,0 +1,223 @@
0.10.8 / 2013-03-11
==================
* Add slice method (SBoudrias)
0.10.7 / 2013-02-10
==================
* Code & doc cleanup (davidchambers)
* Fixed bug in filter (jugglinmike)
0.10.6 / 2013-01-29
==================
* Added `$.contains(...)` (jugglinmike)
* formatting cleanup (davidchambers)
* Bug fix for `.children()` (jugglinmike & davidchambers)
* Remove global `render` bug (wvl)
0.10.5 / 2012-12-18
===================
* Fixed botched publish from 0.10.4 - changes should now be present
0.10.4 / 2012-12-16
==================
* $.find should query descendants only (@jugglinmike)
* Tighter underscore dependency
0.10.3 / 2012-11-18
===================
* fixed outer html bug
* Updated documentation for $(...).html() and $.html()
0.10.2 / 2012-11-17
===================
* Added a toString() method (@bensheldon)
* use `_.each` and `_.map` to simplify cheerio namesakes (@davidchambers)
* Added filter() with tests and updated readme (@bensheldon & @davidchambers)
* Added spaces between attributes rewritten by removeClass (@jos3000)
* updated docs to remove reference to size method (@ironchefpython)
* removed tidy from cheerio
0.10.1 / 2012-10-04
===================
* Fixed regression, filtering with a context (#106)
0.10.0 / 2012-09-24
===================
* Greatly simplified and reorganized the library, reducing the loc by 30%
* Now supports mocha's test-coverage
* Deprecated self-closing tags (HTML5 doesn't require them)
* Fixed error thrown in removeClass(...) @robashton
0.9.2 / 2012-08-10
==================
* added $(...).map(fn)
* manipulation: refactor `makeCheerioArray`
* make .removeClass() remove *all* occurrences (#64)
0.9.1 / 2012-08-03
==================
* fixed bug causing options not to make it to the parser
0.9.0 / 2012-07-24
==================
* Added node 8.x support
* Removed node 4.x support
* Add html(dom) support (@wvl)
* fixed xss vulnerabilities on .attr(), .text(), & .html() (@benatkin, @FB55)
* Rewrote tests into javascript, removing coffeescript dependency (@davidchambers)
* Tons of cleanup (@davidchambers)
0.8.3 / 2012-06-12
==================
* Fixed minor package regression (closes #60)
0.8.2 / 2012-06-11
==================
* Now fails gracefully in cases that involve special chars, which is inline with jQuery (closes #59)
* text() now decode special entities (closes #52)
* updated travis.yml to test node 4.x
0.8.1 / 2012-06-02
==================
* fixed regression where if you created an element, it would update the root
* compatible with node 4.x (again)
0.8.0 / 2012-05-27
==================
* Updated CSS parser to use FB55/CSSselect. Cheerio now supports most CSS3 psuedo selectors thanks to @FB55.
* ignoreWhitespace now on by default again. See #55 for context.
* Changed $(':root') to $.root(), cleaned up $.clone()
* Support for .eq(i) thanks to @alexbardas
* Removed support for node 0.4.x
* Fixed memory leak where package.json was continually loaded
* Tons more tests
0.7.0 / 2012-04-08
==================
* Now testing with node v0.7.7
* Added travis-ci integration
* Replaced should.js with expect.js. Browser testing to come
* Fixed spacing between attributes and their values
* Added HTML pretty print
* Exposed node-htmlparser2 parsing options
* Revert .replaceWith(...) to be consistent with jQuery
0.6.2 / 2012-02-12
==================
* Fixed .replaceWith(...) regression
0.6.1 / 2012-02-12
==================
* Added .first(), .last(), and .clone() commands.
* Option to parse using whitespace added to `.load`.
* Many bug fixes to make cheerio more aligned with jQuery.
* Added $(':root') to select the highest level element.
Many thanks to the contributors that made this release happen: @ironchefpython and @siddMahen
0.6.0 / 2012-02-07
==================
* *Important:* `$(...).html()` now returns inner HTML, which is in line with the jQuery spec
* `$.html()` returns the full HTML string. `$.html([cheerioObject])` will return the outer(selected element's tag) and inner HTML of that object
* Fixed bug that prevented HTML strings with depth (eg. `append('<ul><li><li></ul>')`) from getting `parent`, `next`, `prev` attributes.
* Halted [htmlparser2](https://github.com/FB55/node-htmlparser) at v2.2.2 until single attributes bug gets fixed.
0.5.1 / 2012-02-05
==================
* Fixed minor regression: $(...).text(fn) would fail
0.5.1 / 2012-02-05
==================
* Fixed regression: HTML pages with comments would fail
0.5.0 / 2012-02-04
==================
* Transitioned from Coffeescript back to Javascript
* Parser now ignores whitespace
* Fixed issue with double slashes on self-enclosing tags
* Added boolean attributes to html rendering
0.4.2 / 2012-01-16
==================
* Multiple selectors support: $('.apple, .orange'). Thanks @siddMahen!
* Update package.json to always use latest cheerio-soupselect
* Fix memory leak in index.js
0.4.1 / 2011-12-19
==================
* Minor packaging changes to allow `make test` to work from npm installation
0.4.0 / 2011-12-19
==================
* Rewrote all unit tests as cheerio transitioned from vows -> mocha
* Internally, renderer.render -> render(...), parser.parse -> parse(...)
* Append, prepend, html, before, after all work with only text (no tags)
* Bugfix: Attributes can now be removed from script and style tags
* Added yield as a single tag
* Cheerio now compatible with node >=0.4.7
0.3.2 / 2011-12-1
=================
* Fixed $(...).text(...) to work with "root" element
0.3.1 / 2011-11-25
==================
* Now relying on cheerio-soupselect instead of node-soupselect
* Removed all lingering htmlparser dependencies
* parser now returns parent "root" element. Root now never needs to be updated when there is multiple roots. This fixes ongoing issues with before(...), after(...) and other manipulation functions
* Added jQuery's $(...).replaceWith(...)
0.3.0 / 2011-11-19
==================
* Now using htmlparser2 for parsing (2x speed increase, cleaner, actively developed)
* Added benchmark directory for future speed tests
* $('...').dom() was funky, so it was removed in favor of $('...').get(). $.dom() still works the same.
* $.root now correctly static across all instances of $
* Added a screencast
0.2.2 / 2011-11-9
=================
* Traversing will select `<script>` and `<style>` tags (Closes Issue: #8)
* .text(string) now working with empty elements (Closes Issue: #7)
* Fixed before(...) & after(...) again if there is no parent (Closes Issue: #2)
0.2.1 / 2011-11-5
=================
* Fixed before(...) & after(...) if there is no parent (Closes Issue: #2)
* Comments now rendered correctly (Closes Issue: #5)
< 0.2.0 / 2011-10-31
====================
* Initial release (untracked development)

18
node_modules/cheerio/Makefile generated vendored Normal file
View File

@@ -0,0 +1,18 @@
REPORTER = dot
test:
@./node_modules/mocha/bin/mocha --reporter $(REPORTER)
setup:
@npm install
subl:
@subl lib/ test/ package.json index.js
test-cov: lib-cov
@CHEERIO_COV=1 $(MAKE) test REPORTER=html-cov > coverage.html
lib-cov:
@jscoverage lib lib-cov
.PHONY: test build setup subl

587
node_modules/cheerio/Readme.md generated vendored Normal file
View File

@@ -0,0 +1,587 @@
# cheerio [![Build Status](https://secure.travis-ci.org/MatthewMueller/cheerio.png?branch=master)](http://travis-ci.org/MatthewMueller/cheerio)
Fast, flexible, and lean implementation of core jQuery designed specifically for the server.
## Introduction
Teach your server HTML.
```js
var cheerio = require('cheerio'),
$ = cheerio.load('<h2 class = "title">Hello world</h2>');
$('h2.title').text('Hello there!');
$('h2').addClass('welcome');
$.html();
//=> <h2 class = "title welcome">Hello there!</h2>
```
## Installation
`npm install cheerio`
## Features
__&#10084; Familiar syntax:__
Cheerio implements a subset of core jQuery. Cheerio removes all the DOM inconsistencies and browser cruft from the jQuery library, revealing its truly gorgeous API.
__&#991; Blazingly fast:__
Cheerio works with a very simple, consistent DOM model. As a result parsing, manipulating, and rendering are incredibly efficient. Preliminary end-to-end benchmarks suggest that cheerio is about __8x__ faster than JSDOM.
__&#10049; Insanely flexible:__
Cheerio wraps around @FB55's forgiving htmlparser. Cheerio can parse nearly any HTML or XML document.
## What about JSDOM?
I wrote cheerio because I found myself increasingly frustrated with JSDOM. For me, there were three main sticking points that I kept running into again and again:
__&#8226; JSDOM's built-in parser is too strict:__
JSDOM's bundled HTML parser cannot handle many popular sites out there today.
__&#8226; JSDOM is too slow:__
Parsing big websites with JSDOM has a noticeable delay.
__&#8226; JSDOM feels too heavy:__
The goal of JSDOM is to provide an identical DOM environment as what we see in the browser. I never really needed all this, I just wanted a simple, familiar way to do HTML manipulation.
## When I would use JSDOM
Cheerio will not solve all your problems. I would still use JSDOM if I needed to work in a browser-like environment on the server, particularly if I wanted to automate functional tests.
## API
### Markup example we'll be using:
```html
<ul id="fruits">
<li class="apple">Apple</li>
<li class="orange">Orange</li>
<li class="pear">Pear</li>
</ul>
```
This is the HTML markup we will be using in all of the API examples.
### Loading
First you need to load in the HTML. This step in jQuery is implicit, since jQuery operates on the one, baked-in DOM. With Cheerio, we need to pass in the HTML document.
This is the _preferred_ method:
```js
var cheerio = require('cheerio'),
$ = cheerio.load('<ul id = "fruits">...</ul>');
```
Optionally, you can also load in the HTML by passing the string as the context:
```js
$ = require('cheerio');
$('ul', '<ul id = "fruits">...</ul>');
```
Or as the root:
```js
$ = require('cheerio');
$('li', 'ul', '<ul id = "fruits">...</ul>');
```
You can also pass an extra object to `.load()` if you need to modify any
of the default parsing options:
```js
$ = cheerio.load('<ul id = "fruits">...</ul>', {
ignoreWhitespace: true,
xmlMode: true
});
```
These parsing options are taken directly from htmlparser, therefore any options that can be used in htmlparser
are valid in cheerio as well. The default options are:
```js
{
ignoreWhitespace: false,
xmlMode: false,
lowerCaseTags: false
}
```
For a list of options and their effects, see [this](https://github.com/FB55/node-htmlparser/wiki/DOMHandler) and
[this](https://github.com/FB55/node-htmlparser/wiki/Parser-options).
### Selectors
Cheerio's selector implementation is nearly identical to jQuery's, so the API is very similar.
#### $( selector, [context], [root] )
`selector` searches within the `context` scope which searches within the `root` scope. `selector` and `context` can be an string expression, DOM Element, array of DOM elements, or cheerio object. `root` is typically the HTML document string.
This selector method is the starting point for traversing and manipulating the document. Like jQuery, it's the primary method for selecting elements in the document, but unlike jQuery it's built on top of the CSSSelect library, which implements most of the Sizzle selectors.
```js
$('.apple', '#fruits').text()
//=> Apple
$('ul .pear').attr('class')
//=> pear
$('li[class=orange]').html()
//=> <li class = "orange">Orange</li>
```
### Attributes
Methods for getting and modifying attributes.
#### .attr( name, value )
Method for getting and setting attributes. Gets the attribute value for only the first element in the matched set. If you set an attribute's value to `null`, you remove that attribute. You may also pass a `map` and `function` like jQuery.
```js
$('ul').attr('id')
//=> fruits
$('.apple').attr('id', 'favorite').html()
//=> <li class = "apple" id = "favorite">Apple</li>
```
> See http://api.jquery.com/attr/ for more information
#### .removeAttr( name )
Method for removing attributes by `name`.
```js
$('.pear').removeAttr('class').html()
//=> <li>Pear</li>
```
#### .hasClass( className )
Check to see if *any* of the matched elements have the given `className`.
```js
$('.pear').hasClass('pear')
//=> true
$('apple').hasClass('fruit')
//=> false
$('li').hasClass('pear')
//=> true
```
#### .addClass( className )
Adds class(es) to all of the matched elements. Also accepts a `function` like jQuery.
```js
$('.pear').addClass('fruit').html()
//=> <li class = "pear fruit">Pear</li>
$('.apple').addClass('fruit red').html()
//=> <li class = "apple fruit red">Apple</li>
```
> See http://api.jquery.com/addClass/ for more information.
#### .removeClass( [className] )
Removes one or more space-separated classes from the selected elements. If no `className` is defined, all classes will be removed. Also accepts a `function` like jQuery.
```js
$('.pear').removeClass('pear').html()
//=> <li class = "">Pear</li>
$('.apple').addClass('red').removeClass().html()
//=> <li class = "">Apple</li>
```
> See http://api.jquery.com/removeClass/ for more information.
### Traversing
#### .find(selector)
Get a set of descendants filtered by `selector` of each element in the current set of matched elements.
```js
$('#fruits').find('li').length
//=> 3
```
#### .parent()
Gets the parent of the first selected element.
```js
$('.pear').parent().attr('id')
//=> fruits
```
#### .next()
Gets the next sibling of the first selected element.
```js
$('.apple').next().hasClass('orange')
//=> true
```
#### .prev()
Gets the previous sibling of the first selected element.
```js
$('.orange').prev().hasClass('apple')
//=> true
```
#### .slice( start, [end] )
Gets the elements matching the specified range
```js
$('li').slice(1).eq(0).text()
//=> 'Orange'
$('li').slice(1, 2).length
//=> 1
```
#### .siblings()
Gets the first selected element's siblings, excluding itself.
```js
$('.pear').siblings().length
//=> 2
```
#### .children( selector )
Gets the children of the first selected element.
```js
$('#fruits').children().length
//=> 3
$('#fruits').children('.pear').text()
//=> Pear
```
#### .each( function(index, element) )
Iterates over a cheerio object, executing a function for each matched element. When the callback is fired, the function is fired in the context of the DOM element, so `this` refers to the current element, which is equivalent to the function parameter `element`. To break out of the `each` loop early, return with `false`.
```js
var fruits = [];
$('li').each(function(i, elem) {
fruits[i] = $(this).text();
});
fruits.join(', ');
//=> Apple, Orange, Pear
```
#### .map( function(index, element) )
Iterates over a cheerio object, executing a function for each selected element. Map will return an `array` of return values from each of the functions it iterated over. The function is fired in the context of the DOM element, so `this` refers to the current element, which is equivalent to the function parameter `element`.
```js
$('li').map(function(i, el) {
// this === el
return $(this).attr('class');
}).join(', ');
//=> apple, orange, pear
```
#### .filter( selector ) <br /> .filter( function(index) )
Iterates over a cheerio object, reducing the set of selector elements to those that match the selector or pass the function's test. If using the function method, the function is executed in the context of the selected element, so `this` refers to the current element.
Selector:
```js
$('li').filter('.orange').attr('class');
//=> orange
```
Function:
```js
$('li').filter(function(i, el) {
// this === el
return $(this).attr('class') === 'orange';
}).attr('class')
//=> orange
```
#### .first()
Will select the first element of a cheerio object
```js
$('#fruits').children().first().text()
//=> Apple
```
#### .last()
Will select the last element of a cheerio object
```js
$('#fruits').children().last().text()
//=> Pear
```
#### .eq( i )
Reduce the set of matched elements to the one at the specified index. Use `.eq(-i)` to count backwards from the last selected element.
```js
$('li').eq(0).text()
//=> Apple
$('li').eq(-1).text()
//=> Pear
```
### Manipulation
Methods for modifying the DOM structure.
#### .append( content, [content, ...] )
Inserts content as the *last* child of each of the selected elements.
```js
$('ul').append('<li class = "plum">Plum</li>')
$.html()
//=> <ul id = "fruits">
// <li class = "apple">Apple</li>
// <li class = "orange">Orange</li>
// <li class = "pear">Pear</li>
// <li class = "plum">Plum</li>
// </ul>
```
#### .prepend( content, [content, ...] )
Inserts content as the *first* child of each of the selected elements.
```js
$('ul').prepend('<li class = "plum">Plum</li>')
$.html()
//=> <ul id = "fruits">
// <li class = "plum">Plum</li>
// <li class = "apple">Apple</li>
// <li class = "orange">Orange</li>
// <li class = "pear">Pear</li>
// </ul>
```
#### .after( content, [content, ...] )
Insert content next to each element in the set of matched elements.
```js
$('.apple').after('<li class = "plum">Plum</li>')
$.html()
//=> <ul id = "fruits">
// <li class = "apple">Apple</li>
// <li class = "plum">Plum</li>
// <li class = "orange">Orange</li>
// <li class = "pear">Pear</li>
// </ul>
```
#### .before( content, [content, ...] )
Insert content previous to each element in the set of matched elements.
```js
$('.apple').before('<li class = "plum">Plum</li>')
$.html()
//=> <ul id = "fruits">
// <li class = "plum">Plum</li>
// <li class = "apple">Apple</li>
// <li class = "orange">Orange</li>
// <li class = "pear">Pear</li>
// </ul>
```
#### .remove( [selector] )
Removes the set of matched elements from the DOM and all their children. `selector` filters the set of matched elements to be removed.
```js
$('.pear').remove()
$.html()
//=> <ul id = "fruits">
// <li class = "apple">Apple</li>
// <li class = "orange">Orange</li>
// </ul>
```
#### .replaceWith( content )
Replaces matched elements with `content`.
```js
var plum = $('<li class = "plum">Plum</li>')
$('.pear').replaceWith(plum)
$.html()
//=> <ul id = "fruits">
// <li class = "apple">Apple</li>
// <li class = "orange">Orange</li>
// <li class = "plum">Plum</li>
// </ul>
```
#### .empty()
Empties an element, removing all it's children.
```js
$('ul').empty()
$.html()
//=> <ul id = "fruits"></ul>
```
#### .html( [htmlString] )
Gets an html content string from the first selected element. If `htmlString` is specified, each selected element's content is replaced by the new content.
```js
$('.orange').html()
//=> Orange
$('#fruits').html('<li class = "mango">Mango</li>').html()
//=> <li class="mango">Mango</li>
```
#### .text( [textString] )
Get the combined text contents of each element in the set of matched elements, including their descendants.. If `textString` is specified, each selected element's content is replaced by the new text content.
```js
$('.orange').text()
//=> Orange
$('ul').text()
//=> Apple
// Orange
// Pear
```
### Rendering
When you're ready to render the document, you can use `html` utility function:
```js
$.html()
//=> <ul id = "fruits">
// <li class = "apple">Apple</li>
// <li class = "orange">Orange</li>
// <li class = "pear">Pear</li>
// </ul>
```
If you want to return the outerHTML you can use `$.html(selector)`:
```js
$.html('.pear')
//=> <li class = "pear">Pear</li>
```
### Miscellaneous
DOM element methods that don't fit anywhere else
#### .toArray()
Retrieve all the DOM elements contained in the jQuery set, as an array.
```js
$('li').toArray()
//=> [ {...}, {...}, {...} ]
```
#### .clone() ####
Clone the cheerio object.
```js
var moreFruit = $('#fruits').clone()
```
### Utilities
#### $.root
Sometimes you need to work with the top-level root element. To query it, you can use `$.root()`.
```js
$.root().append('<ul id="vegetables"></ul>').html();
//=> <ul id="fruits">...</ul><ul id="vegetables"></ul>
```
#### $.contains( container, contained )
Checks to see if the `contained` DOM element is a descendent of the `container` DOM element.
## Screencasts
http://vimeo.com/31950192
> This video tutorial is a follow-up to Nettut's "How to Scrape Web Pages with Node.js and jQuery", using cheerio instead of JSDOM + jQuery. This video shows how easy it is to use cheerio and how much faster cheerio is than JSDOM + jQuery.
## Test Coverage
Cheerio has high-test coverage, you can view the report [here](https://s3.amazonaws.com/MattMueller/Coverage/cheerio.html).
## Testing
To run the test suite, download the repository, then within the cheerio directory, run:
```shell
make setup
make test
```
This will download the development packages and run the test suite.
## Contributors
These are some of the contributors that have made cheerio possible:
```
project : cheerio
repo age : 1 year, 4 months ago
commits : 416
active : 118 days
files : 26
authors :
278 Matt Mueller 66.8%
68 Matthew Mueller 16.3%
27 David Chambers 6.5%
15 Siddharth Mahendraker 3.6%
7 ironchefpython 1.7%
5 Jos Shepherd 1.2%
5 Ben Sheldon 1.2%
2 alexbardas 0.5%
2 Rob Ashton 0.5%
1 mattym 0.2%
1 Chris O'Hara 0.2%
1 Mike Pennisi 0.2%
1 Rob "Hurricane" Ashton 0.2%
1 Sindre Sorhus 0.2%
1 Wayne Larsen 0.2%
1 Ben Atkin 0.2%
```
## Special Thanks
This library stands on the shoulders of some incredible developers. A special thanks to:
__&#8226; @FB55 for node-htmlparser2 & CSSSelect:__
Felix has a knack for writing speedy parsing engines. He completely re-wrote both @tautologistic's `node-htmlparser` and @harry's `node-soupselect` from the ground up, making both of them much faster and more flexible. Cheerio would not be possible without his foundational work
__&#8226; @jQuery team for jQuery:__
The core API is the best of it's class and despite dealing with all the browser inconsistencies the code base is extremely clean and easy to follow. Much of cheerio's implementation and documentation is from jQuery. Thanks guys.
__&#8226; @visionmedia:__
The style, the structure, the open-source"-ness" of this library comes from studying TJ's style and using many of his libraries. This dude consistently pumps out high-quality libraries and has always been more than willing to help or answer questions. You rock TJ.
## License
(The MIT License)
Copyright (c) 2012 Matt Mueller &lt;mattmuelle@gmail.com&gt;
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
'Software'), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice shall be
included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

14
node_modules/cheerio/benchmarks/htmlparser.js generated vendored Normal file
View File

@@ -0,0 +1,14 @@
var request = require('request'),
cheerio = require('cheerio');
request('http://yahoo.com', function(err, response, body) {
if (!err && response.statusCode === 200) {
var start = new Date(),
$ = cheerio.load(body),
end = new Date();
console.log('ops took: ' + (end.getTime() - start.getTime()) + ' ms');
}
});

13
node_modules/cheerio/index.js generated vendored Normal file
View File

@@ -0,0 +1,13 @@
/**
* Export cheerio (with )
*/
exports = module.exports = process.env.CHEERIO_COV
? require('./lib-cov/cheerio')
: require('./lib/cheerio');
/*
Export the version
*/
exports.version = require('./package').version;

149
node_modules/cheerio/lib/api/attributes.js generated vendored Normal file
View File

@@ -0,0 +1,149 @@
var _ = require('underscore'),
utils = require('../utils'),
isTag = utils.isTag,
decode = utils.decode,
encode = utils.encode,
rspace = /\s+/,
// Attributes that are booleans
rboolean = /^(?:autofocus|autoplay|async|checked|controls|defer|disabled|hidden|loop|multiple|open|readonly|required|scoped|selected)$/i;
var setAttr = function(el, name, value) {
if (typeof name === 'object') return _.extend(el.attribs, name);
if (value === null) {
removeAttribute(el, name);
} else {
el.attribs[name] = encode(value);
}
return el.attribs;
};
var attr = exports.attr = function(name, value) {
var elem = this[0];
if (!elem || !isTag(elem))
return undefined;
if (!elem.attribs) {
elem.attribs = {};
}
// Return the entire attribs object if no attribute specified
if (!name) {
for (var a in elem.attribs) {
elem.attribs[a] = decode(elem.attribs[a]);
}
return elem.attribs;
}
// Set the value (with attr map support)
if (typeof name === 'object' || value !== undefined) {
this.each(function(i, el) {
el.attribs = setAttr(el, name, value);
});
return this;
} else if (Object.hasOwnProperty.call(elem.attribs, name)) {
// Get the (decoded) attribute
return decode(elem.attribs[name]);
}
};
/**
* Remove an attribute
*/
var removeAttribute = function(elem, name) {
if (!isTag(elem.type) || !elem.attribs || !Object.hasOwnProperty.call(elem.attribs, name))
return;
if (rboolean.test(elem.attribs[name]))
elem.attribs[name] = false;
else
delete elem.attribs[name];
};
var removeAttr = exports.removeAttr = function(name) {
this.each(function(i, elem) {
removeAttribute(elem, name);
});
return this;
};
var hasClass = exports.hasClass = function(className) {
return _.any(this, function(elem) {
var attrs = elem.attribs;
return attrs && _.contains((attrs['class'] || '').split(rspace), className);
});
};
var addClass = exports.addClass = function(value) {
// Support functions
if (_.isFunction(value)) {
this.each(function(i) {
var className = this.attr('class') || '';
this.addClass(value.call(this, i, className));
});
}
// Return if no value or not a string or function
if (!value || !_.isString(value)) return this;
var classNames = value.split(rspace),
numElements = this.length,
numClasses,
setClass,
$elem;
for (var i = 0; i < numElements; i++) {
$elem = this.make(this[i]);
// If selected element isnt a tag, move on
if (!isTag(this[i])) continue;
// If we don't already have classes
if (!$elem.attr('class')) {
$elem.attr('class', classNames.join(' ').trim());
} else {
setClass = ' ' + $elem.attr('class') + ' ';
numClasses = classNames.length;
// Check if class already exists
for (var j = 0; j < numClasses; j++) {
if (!~setClass.indexOf(' ' + classNames[j] + ' '))
setClass += classNames[j] + ' ';
}
$elem.attr('class', setClass.trim());
}
}
return this;
};
var removeClass = exports.removeClass = function(value) {
var split = function(className) {
return className ? className.trim().split(rspace) : [];
};
var classes = split(value);
// Handle if value is a function
if (_.isFunction(value)) {
return this.each(function(i, el) {
this.removeClass(value.call(this, i, el.attribs['class'] || ''));
});
}
return this.each(function(i, el) {
if (!isTag(el)) return;
el.attribs['class'] = (!value) ? '' : _.reject(
split(el.attribs['class']),
function(name) { return _.contains(classes, name); }
).join(' ');
});
};

192
node_modules/cheerio/lib/api/manipulation.js generated vendored Normal file
View File

@@ -0,0 +1,192 @@
var _ = require('underscore'),
parse = require('../parse'),
$ = require('../static'),
updateDOM = parse.update,
evaluate = parse.evaluate,
encode = require('../utils').encode,
slice = Array.prototype.slice;
/*
Creates an array of cheerio objects,
parsing strings if necessary
*/
var makeCheerioArray = function(elems) {
return _.reduce(elems, function(dom, elem) {
return dom.concat(elem.cheerio ? elem.toArray() : evaluate(elem));
}, []);
};
var _insert = function(concatenator) {
return function() {
var elems = slice.call(arguments),
dom = makeCheerioArray(elems);
return this.each(function(i, el) {
if (_.isFunction(elems[0])) return el; // not yet supported
updateDOM(concatenator(dom, el.children || (el.children = [])), el);
});
};
};
var append = exports.append = _insert(function(dom, children) {
return children.concat(dom);
});
var prepend = exports.prepend = _insert(function(dom, children) {
return dom.concat(children);
});
var after = exports.after = function() {
var elems = slice.call(arguments),
dom = makeCheerioArray(elems);
this.each(function(i, el) {
var siblings = el.parent.children,
index = siblings.indexOf(el);
// If not found, move on
if (!~index) return;
// Add element after `this` element
siblings.splice.apply(siblings, [++index, 0].concat(dom));
// Update next, prev, and parent pointers
updateDOM(siblings, el.parent);
el.parent.children = siblings;
});
return this;
};
var before = exports.before = function() {
var elems = slice.call(arguments),
dom = makeCheerioArray(elems);
this.each(function(i, el) {
var siblings = el.parent.children,
index = siblings.indexOf(el);
// If not found, move on
if (!~index) return;
// Add element before `el` element
siblings.splice.apply(siblings, [index, 0].concat(dom));
// Update next, prev, and parent pointers
updateDOM(siblings, el.parent);
el.parent.children = siblings;
});
return this;
};
/*
remove([selector])
*/
var remove = exports.remove = function(selector) {
var elems = this;
// Filter if we have selector
if (selector)
elems = elems.filter(selector);
elems.each(function(i, el) {
var siblings = el.parent.children,
index = siblings.indexOf(el);
if (!~index) return;
siblings.splice(index, 1);
// Update next, prev, and parent pointers
updateDOM(siblings, el.parent);
el.parent.children = siblings;
});
return this;
};
var replaceWith = exports.replaceWith = function(content) {
content = content.cheerio ? content.toArray() : evaluate(content);
this.each(function(i, el) {
var siblings = el.parent.children,
index = siblings.indexOf(el);
if (!~index) return;
siblings.splice.apply(siblings, [index, 1].concat(content));
updateDOM(siblings, el.parent);
el.parent.children = siblings;
});
return this;
};
var empty = exports.empty = function() {
this.each(function(i, el) {
el.children = [];
});
return this;
};
/**
* Set/Get the HTML
*/
var html = exports.html = function(str) {
if (str === undefined) {
if (!this[0] || !this[0].children) return null;
return $.html(this[0].children);
}
str = str.cheerio ? str.toArray() : evaluate(str);
this.each(function(i, el) {
el.children = str;
updateDOM(el.children, el);
});
return this;
};
var toString = exports.toString = function() {
return $.html(this);
};
var text = exports.text = function(str) {
// If `str` blank or an object
if (!str || typeof str === 'object') {
return $.text(this);
} else if (_.isFunction(str)) {
// Function support
return this.each(function(i, el) {
return this.text(str.call(el, i, this.text()));
});
}
var elem = {
data: encode(str),
type: 'text',
parent: null,
prev: null,
next: null,
children: []
};
// Append text node to each selected elements
this.each(function(i, el) {
el.children = elem;
updateDOM(el.children, el);
});
return this;
};
var clone = exports.clone = function() {
// Turn it into HTML, then recreate it,
// Seems to be the easiest way to reconnect everything correctly
return this.constructor($.html(this));
};

116
node_modules/cheerio/lib/api/traversing.js generated vendored Normal file
View File

@@ -0,0 +1,116 @@
var _ = require('underscore'),
select = require('cheerio-select'),
utils = require('../utils'),
isTag = utils.isTag;
var find = exports.find = function(selector) {
if (!selector) return this;
try {
var elem = select(selector, [].slice.call(this.children()));
return this.make(elem);
} catch(e) {
return this.make([]);
}
};
var parent = exports.parent = function(elem) {
if (this[0] && this[0].parent)
return this.make(this[0].parent);
else
return this;
};
var next = exports.next = function(elem) {
if (!this[0]) return this;
var nextSibling = this[0].next;
while (nextSibling) {
if (isTag(nextSibling)) return this.make(nextSibling);
nextSibling = nextSibling.next;
}
return this;
};
var prev = exports.prev = function(elem) {
if (!this[0]) return this;
var prevSibling = this[0].prev;
while (prevSibling) {
if (isTag(prevSibling)) return this.make(prevSibling);
prevSibling = prevSibling.prev;
}
return this;
};
var siblings = exports.siblings = function(elem) {
if (!this[0]) return this;
var self = this,
siblings = (this.parent()) ? this.parent().children()
: this.siblingsAndMe();
siblings = _.filter(siblings, function(elem) {
return (elem !== self[0] && isTag(elem));
});
return this.make(siblings);
};
var children = exports.children = function(selector) {
var elems = _.reduce(this, function(memo, elem) {
return memo.concat(_.filter(elem.children, isTag));
}, []);
if (selector === undefined) return this.make(elems);
else if (_.isNumber(selector)) return this.make(elems[selector]);
return this.make(elems).filter(selector);
};
var each = exports.each = function(fn) {
var length = this.length,
el, i;
for (i = 0; i < length; ++i) {
el = this[i];
if (fn.call(this.make(el), i, el) === false) {
break;
}
}
return this;
};
var map = exports.map = function(fn) {
return _.map(this, function(el, i) {
return fn.call(this.make(el), i, el);
}, this);
};
var filter = exports.filter = function(match) {
var make = _.bind(this.make, this);
return make(_.filter(this, _.isString(match) ?
function(el) { return select(match, el)[0] === el; }
: function(el, i) { return match.call(make(el), i, el); }
));
};
var first = exports.first = function() {
return this[0] ? this.make(this[0]) : this;
};
var last = exports.last = function() {
return this[0] ? this.make(this[this.length - 1]) : this;
};
// Reduce the set of matched elements to the one at the specified index.
var eq = exports.eq = function(i) {
i = +i;
if (i < 0) i = this.length + i;
return this[i] ? this.make(this[i]) : this.make([]);
};
var slice = exports.slice = function() {
return this.make([].slice.apply(this, arguments));
};

143
node_modules/cheerio/lib/cheerio.js generated vendored Normal file
View File

@@ -0,0 +1,143 @@
/*
Module dependencies
*/
var path = require('path'),
select = require('cheerio-select'),
parse = require('./parse'),
evaluate = parse.evaluate,
updateDOM = parse.update,
_ = require('underscore');
/*
* The API
*/
var api = ['attributes', 'traversing', 'manipulation'];
/*
* A simple way to check for HTML strings or ID strings
*/
var quickExpr = /^(?:[^#<]*(<[\w\W]+>)[^>]*$|#([\w\-]*)$)/;
/**
* Static Methods
*/
var $ = require('./static');
/*
* Instance of cheerio
*/
var Cheerio = module.exports = function(selector, context, root) {
if (!(this instanceof Cheerio)) return new Cheerio(selector, context, root);
// $(), $(null), $(undefined), $(false)
if (!selector) return this;
if (root) {
if (typeof root === 'string') root = parse(root);
this._root = this.make(root, this);
}
// $($)
if (selector.cheerio) return selector;
// $(dom)
if (selector.name || Array.isArray(selector))
return this.make(selector, this);
// $(<html>)
if (typeof selector === 'string' && isHtml(selector)) {
return this.make(parse(selector).children);
}
// If we don't have a context, maybe we have a root, from loading
if (!context) {
context = this._root;
} else if (typeof context === 'string') {
if (isHtml(context)) {
// $('li', '<ul>...</ul>')
context = parse(context);
context = this.make(context, this);
} else {
// $('li', 'ul')
selector = [context, selector].join(' ');
context = this._root;
}
}
// If we still don't have a context, return
if (!context) return this;
// #id, .class, tag
return context.parent().find(selector);
};
/**
* Inherit from `static`
*/
Cheerio.__proto__ = require('./static');
/*
* Set a signature of the object
*/
Cheerio.prototype.cheerio = '[cheerio object]';
/*
* Cheerio default options
*/
Cheerio.prototype.options = {
ignoreWhitespace: false,
xmlMode: false,
lowerCaseTags: false
};
/*
* Make cheerio an array-like object
*/
Cheerio.prototype.length = 0;
Cheerio.prototype.sort = [].splice;
/*
* Check if string is HTML
*/
var isHtml = function(str) {
// Faster than running regex, if str starts with `<` and ends with `>`, assume it's HTML
if (str.charAt(0) === '<' && str.charAt(str.length - 1) === '>' && str.length >= 3) return true;
// Run the regex
var match = quickExpr.exec(str);
return !!(match && match[1]);
};
/*
* Make a cheerio object
*/
Cheerio.prototype.make = function(dom, context) {
if (dom.cheerio) return dom;
dom = (Array.isArray(dom)) ? dom : [dom];
return _.extend(context || new Cheerio(), dom, { length: dom.length });
};
/**
* Turn a cheerio object into an array
*/
Cheerio.prototype.toArray = function() {
return [].slice.call(this, 0);
};
/**
* Plug in the API
*/
api.forEach(function(mod) {
_.extend(Cheerio.prototype, require('./api/' + mod));
});

97
node_modules/cheerio/lib/parse.js generated vendored Normal file
View File

@@ -0,0 +1,97 @@
/*
Module Dependencies
*/
var htmlparser = require('htmlparser2'),
_ = require('underscore'),
isTag = require('./utils').isTag;
/*
Parser
*/
exports = module.exports = function(content, options) {
var dom = evaluate(content, options);
// Generic root element
var root = {
type: 'root',
name: 'root',
parent: null,
prev: null,
next: null,
children: []
};
// Update the dom using the root
update(dom, root);
return root;
};
var evaluate = exports.evaluate = function(content, options) {
// options = options || $.fn.options;
var handler = new htmlparser.DomHandler(options),
parser = new htmlparser.Parser(handler, options);
parser.write(content);
parser.done();
return connect(handler.dom);
};
var connect = exports.connect = function(dom, parent) {
parent = parent || null;
var prevElem = null;
_.each(dom, function(elem) {
// If tag and no attributes, add empty object
if (isTag(elem.type) && elem.attribs === undefined)
elem.attribs = {};
// Set parent
elem.parent = parent;
// Previous Sibling
elem.prev = prevElem;
// Next sibling
elem.next = null;
if (prevElem) prevElem.next = elem;
// Run through the children
if (elem.children)
connect(elem.children, elem);
else if (isTag(elem.type))
elem.children = [];
// Get ready for next element
prevElem = elem;
});
return dom;
};
/*
Update the dom structure, for one changed layer
* Much faster than reconnecting
*/
var update = exports.update = function(arr, parent) {
// normalize
if (!Array.isArray(arr)) arr = [arr];
// Update neighbors
for (var i = 0; i < arr.length; i++) {
arr[i].prev = arr[i - 1] || null;
arr[i].next = arr[i + 1] || null;
arr[i].parent = parent || null;
}
// Update parent
parent.children = arr;
return parent;
};
// module.exports = $.extend(exports);

121
node_modules/cheerio/lib/render.js generated vendored Normal file
View File

@@ -0,0 +1,121 @@
/*
Module dependencies
*/
var _ = require('underscore');
/*
Boolean Attributes
*/
var rboolean = /^(?:autofocus|autoplay|async|checked|controls|defer|disabled|hidden|loop|multiple|open|readonly|required|scoped|selected)$/i;
/*
Format attributes
*/
var formatAttrs = function(attributes) {
if (!attributes) return '';
var output = [],
value;
// Loop through the attributes
for (var key in attributes) {
value = attributes[key];
if (!value && (rboolean.test(key) || key === '/')) {
output.push(key);
} else {
output.push(key + '="' + value + '"');
}
}
return output.join(' ');
};
/*
Self-enclosing tags (stolen from node-htmlparser)
*/
var singleTag = {
area: 1,
base: 1,
basefont: 1,
br: 1,
col: 1,
frame: 1,
hr: 1,
img: 1,
input: 1,
isindex: 1,
link: 1,
meta: 1,
param: 1,
embed: 1,
include: 1,
'yield': 1
};
/*
Tag types from htmlparser
*/
var tagType = {
tag: 1,
script: 1,
link: 1,
style: 1,
template: 1
};
var render = module.exports = function(dom, opts) {
if (!Array.isArray(dom) && !dom.cheerio) dom = [dom];
opts = opts || {};
var output = [],
xmlMode = opts.xmlMode || false,
ignoreWhitespace = opts.ignoreWhitespace || false;
_.each(dom, function(elem) {
var pushVal;
if (tagType[elem.type])
pushVal = renderTag(elem);
else if (elem.type === 'directive')
pushVal = renderDirective(elem);
else if (elem.type === 'comment')
pushVal = renderComment(elem);
else
pushVal = renderText(elem);
// Push rendered DOM node
output.push(pushVal);
if (elem.children)
output.push(render(elem.children, opts));
if ((!singleTag[elem.name] || xmlMode) && tagType[elem.type])
output.push('</' + elem.name + '>');
});
return output.join('');
};
var renderTag = function(elem) {
var tag = '<' + elem.name;
if (elem.attribs && _.size(elem.attribs)) {
tag += ' ' + formatAttrs(elem.attribs);
}
return tag + '>';
};
var renderDirective = function(elem) {
return '<' + elem.data + '>';
};
var renderText = function(elem) {
return elem.data;
};
var renderComment = function(elem) {
return '<!--' + elem.data + '-->';
};
// module.exports = $.extend(exports);

95
node_modules/cheerio/lib/static.js generated vendored Normal file
View File

@@ -0,0 +1,95 @@
/**
* Module dependencies
*/
var select = require('cheerio-select'),
parse = require('./parse'),
render = require('./render'),
decode = require('./utils').decode;
/**
* $.load(str)
*/
var load = exports.load = function(str, options) {
var Cheerio = require('./cheerio'),
root = parse(str, options);
var initialize = function(selector, context, r) {
return new Cheerio(selector, context, r || root);
};
// Add in the static methods
initialize.__proto__ = exports;
// Add in the root
initialize._root = root;
return initialize;
};
/**
* $.html([selector | dom])
*/
var html = exports.html = function(dom) {
if (dom) {
dom = (typeof dom === 'string') ? select(dom, this._root) : dom;
return render(dom);
} else if (this._root && this._root.children) {
return render(this._root.children);
} else {
return '';
}
};
/**
* $.text(dom)
*/
var text = exports.text = function(elems) {
if (!elems) return '';
var ret = '',
len = elems.length,
elem;
for (var i = 0; i < len; i ++) {
elem = elems[i];
if (elem.type === 'text') ret += decode(elem.data);
else if (elem.children && elem.type !== 'comment') {
ret += text(elem.children);
}
}
return ret;
};
/**
* $.root()
*/
var root = exports.root = function() {
return this(this._root);
};
/**
* $.contains()
*/
var contains = exports.contains = function(container, contained) {
// According to the jQuery API, an element does not "contain" itself
if (contained === container) {
return false;
}
// Step up the descendents, stopping when the root element is reached
// (signaled by `.parent` returning a reference to the same object)
while (contained && contained !== contained.parent) {
contained = contained.parent;
if (contained === container) {
return true;
}
}
return false;
};

30
node_modules/cheerio/lib/utils.js generated vendored Normal file
View File

@@ -0,0 +1,30 @@
/**
* Module Dependencies
*/
var entities = require('entities');
/**
* HTML Tags
*/
var tags = { tag: true, script: true, style: true };
/**
* Check if the DOM element is a tag
*
* isTag(type) includes <script> and <style> tags
*/
exports.isTag = function(type) {
if (type.type) type = type.type;
return tags[type] || false;
};
/**
* Expose encode and decode methods from FB55's node-entities library
*
* 0 = XML, 1 = HTML4 and 2 = HTML5
*/
exports.encode = function(str) { return entities.encode(String(str), 0); };
exports.decode = function(str) { return entities.decode(str, 2); };

View File

@@ -0,0 +1,4 @@
support
test
examples
*.sock

View File

@@ -0,0 +1,5 @@
language: node_js
node_js:
- 0.4
- 0.6
- 0.7

View File

@@ -0,0 +1,18 @@
0.0.3 / 2012-05-29
==================
* compatible with node 4.x
* added travis support
0.0.2 / 2012-05-27
==================
* Now supports for node 0.7.x
* Commented out tests for features that will not be supported
* Down to 19/156 failed tests - thanks to @FB55!
0.0.1 / 2012-05-23
==================
* Initial release

View File

@@ -0,0 +1,7 @@
test:
@./node_modules/mocha/bin/mocha --reporter list
subl:
@subl lib/ test/ package.json index.js
.PHONY: test subl

View File

@@ -0,0 +1,51 @@
# cheerio-select [![Build Status](https://secure.travis-ci.org/MatthewMueller/cheerio-select.png?branch=master)](http://travis-ci.org/MatthewMueller/cheerio-select)
Tiny wrapper around FB55's excellent [CSSselect](https://github.com/FB55/CSSselect) library.
cheerio-select provides a comprehensive test suite based on sizzle's test suite.
> Warning: Currently, not all tests pass, and some sizzle features will not be supported
## Usage
var select = require('cheerio-select'),
parse = require('cheerio').parse,
dom = parse('<ul id = "fruits"><li class = "apple">Apple</li></ul>');
select('#fruits > .apple', dom);
=> [{...}]
## TODO
* Get all the unit tests to pass!
## Run tests
npm install
make test
## License
(The MIT License)
Copyright (c) 2012 Matt Mueller &lt;mattmuelle@gmail.com&gt;
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
'Software'), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice shall be
included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

View File

@@ -0,0 +1,9 @@
exports = module.exports = require('./lib/select');
/*
Export the version
*/
exports.version = (function() {
var pkg = require('fs').readFileSync(__dirname + '/package.json', 'utf8');
return JSON.parse(pkg).version;
})();

View File

@@ -0,0 +1,37 @@
/*
* Module dependencies
*/
var CSSselect = require('CSSselect'),
isArray = Array.isArray;
/*
* Select function
*/
exports = module.exports = function(query, dom) {
dom = normalize(dom);
return CSSselect.iterate(query, dom);
};
/*
* Normalize the dom
*/
var normalize = exports.normalize = function(dom) {
dom = dom.cheerio ? dom.toArray() : dom;
dom = isArray(dom) ? dom : [dom];
var len = dom.length,
out = [],
elem;
for(var i = 0; i < len; i++) {
elem = dom[i];
if(elem.type === 'root') {
out = out.concat(elem.children || []);
} else {
out.push(elem);
}
}
return out;
};

View File

@@ -0,0 +1,8 @@
language: node_js
node_js:
- 0.4
- 0.6
- 0.8
- 0.9
notifications:
email: false

View File

@@ -0,0 +1,11 @@
Copyright (c) Felix Böhm
All rights reserved.
Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
THIS IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS,
EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

View File

@@ -0,0 +1,20 @@
#CSSselect [![Build Status](https://secure.travis-ci.org/fb55/CSSselect.png?branch=master)](http://travis-ci.org/fb55/CSSselect)
##What?
CSSselect is CSS selector engine. It returns a function that tests elements if they match a selector - checking needs to happen "from the top", like browser engines execute queries.
##Why?
Just take the following CSS query: `foo bar baz`. When the element named `baz` has like a billion children, every one of them needs to be checked if they match a query. Three times, to be precise, if you run a CSS query from the start to the end (as e.g. JSDOM does). Yup, that's slow.
This library checks every element once. The more complex the query, the greater the benefit.
##How?
By stacking functions!
##TODO
1. The API needs to be improved
2. Documentation needs to be written

View File

@@ -0,0 +1,35 @@
function isElement(elem){
return elem.nodeType === 1;
}
function getSiblings(elem){
var parent = getParent(elem);
return parent && getChildren(parent);
}
function getChildren(elem){
return elem.childNodes;
}
function getParent(elem){
return elem.parentElement;
}
function getAttributeValue(elem, name){
return elem.attributes[name].value;
}
function hasAttrib(elem, name){
return name in elem.attributes;
}
function getName(elem){
return elem.tagName.toLowerCase();
}
//https://github.com/ded/qwery/blob/master/pseudos/qwery-pseudos.js#L47-54
function getText(elem) {
var str = "",
childs = getChildren(elem);
if(!childs) return str;
for(var i = 0; i < childs.length; i++){
if(isElem(childs[i]) str += el.textContent || el.innerText || getText(childs[i])
}
return s;
}

View File

@@ -0,0 +1,736 @@
;(function(global, CSSwhat){
"use strict";
//functions that make porting the library to another DOM easy
function isElement(elem){
return elem.type === "tag" || elem.type === "style" || elem.type === "script";
}
function getChildren(elem){
return elem.children;
}
function getParent(elem){
return elem.parent;
}
function getAttributeValue(elem, name){
return elem.attribs[name];
}
function hasAttrib(elem, name){
return elem.attribs && name in elem.attribs;
}
function getName(elem){
return elem.name;
}
function getText(elem){
var text = "",
childs = getChildren(elem);
if(!childs) return text;
for(var i = 0, j = childs.length; i < j; i++){
if(isElement(childs[i])) text += getText(childs[i]);
else text += childs[i].data;
}
return text;
}
/*
pseudo selectors
---
they are available in two forms:
* filters called when the selector
is compiled and return a function
that needs to return next()
* pseudos get called on execution
they need to return a boolean
*/
var filters = {
not: function(next, select){
var func = parse(select);
if(func === falseFunc){
if(next === rootFunc) return trueFunc;
else return next;
}
if(func === trueFunc) return falseFunc;
if(func === rootFunc) return falseFunc;
return function(elem){
if(!func(elem)) return next(elem);
};
},
contains: function(next, text){
if(
(text.charAt(0) === "\"" || text.charAt(0) === "'") &&
text.charAt(0) === text.substr(-1)
){
text = text.slice(1, -1);
}
return function(elem){
if(getText(elem).indexOf(text) !== -1) return next(elem);
};
},
has: function(next, select){
var func = parse(select);
if(func === rootFunc || func === trueFunc) return next;
if(func === falseFunc) return falseFunc;
var proc = function(elem){
var children = getChildren(elem);
if(!children) return;
for(var i = 0, j = children.length; i < j; i++){
if(!isElement(children[i])) continue;
if(func(children[i])) return true;
if(proc(children[i])) return true;
}
};
return function proc(elem){
if(proc(elem)) return next(elem);
};
},
root: function(next){
return function(elem){
if(!getParent(elem)) return next(elem);
};
},
empty: function(next){
return function(elem){
var children = getChildren(elem);
if(!children || children.length === 0) return next(elem);
};
},
parent: function(next){ //:parent is the inverse of :empty
return function(elem){
var children = getChildren(elem);
if(children && children.length !== 0) return next(elem);
};
},
//location specific methods
//first- and last-child methods return as soon as they find another element
"first-child": function(next){
return function(elem){
if(getFirstElement(getSiblings(elem)) === elem) return next(elem);
};
},
"last-child": function(next){
return function(elem){
var siblings = getSiblings(elem);
if(!siblings) return;
for(var i = siblings.length-1; i >= 0; i--){
if(siblings[i] === elem) return next(elem);
if(isElement(siblings[i])) return;
}
};
},
"first-of-type": function(next){
return function(elem){
var siblings = getSiblings(elem);
if(!siblings) return;
for(var i = 0, j = siblings.length; i < j; i++){
if(siblings[i] === elem) return next(elem);
if(getName(siblings[i]) === getName(elem)) return;
}
};
},
"last-of-type": function(next){
return function(elem){
var siblings = getSiblings(elem);
if(!siblings) return;
for(var i = siblings.length-1; i >= 0; i--){
if(siblings[i] === elem) return next(elem);
if(getName(siblings[i]) === getName(elem)) return;
}
};
},
"only-of-type": function(next){
return function(elem){
var siblings = getSiblings(elem);
if(!siblings) return;
for(var i = 0, j = siblings.length; i < j; i++){
if(siblings[i] === elem) continue;
if(getName(siblings[i]) === getName(elem)) return;
}
return next(elem);
};
},
"only-child": function(next){
return function(elem){
var siblings = getSiblings(elem);
if(!siblings) return;
if(siblings.length === 1) return next(elem);
for(var i = 0, j = siblings.length; i < j; i++){
if(isElement(siblings[i]) && siblings[i] !== elem) return;
}
return next(elem);
};
},
"nth-child": function(next, rule){
var func = getNCheck(rule);
if(func === falseFunc) return func;
if(func === trueFunc){
if(next === rootFunc) return func;
else return next;
}
return function(elem){
if(func(getIndex(elem))) return next(elem);
};
},
"nth-last-child": function(next, rule){
var func = getNCheck(rule);
if(func === falseFunc) return func;
if(func === trueFunc){
if(next === rootFunc) return func;
else return next;
}
return function(elem){
var siblings = getSiblings(elem);
if(!siblings) return;
for(var pos = 0, i = siblings.length - 1; i >= 0; i--){
if(siblings[i] === elem){
if(func(pos)) return next(elem);
return;
}
if(isElement(siblings[i])) pos++;
}
};
},
"nth-of-type": function(next, rule){
var func = getNCheck(rule);
if(func === falseFunc) return func;
if(func === trueFunc){
if(next === rootFunc) return func;
else return next;
}
return function(elem){
var siblings = getSiblings(elem);
if(!siblings) return;
for(var pos = 0, i = 0, j = siblings.length; i < j; i++){
if(siblings[i] === elem){
if(func(pos)) return next(elem);
return;
}
if(getName(siblings[i]) === getName(elem)) pos++;
}
};
},
"nth-last-of-type": function(next, rule){
var func = getNCheck(rule);
if(func === falseFunc) return func;
if(func === trueFunc){
if(next === rootFunc) return func;
else return next;
}
return function(elem){
var siblings = getSiblings(elem);
if(!siblings) return;
for(var pos = 0, i = siblings.length-1; i >= 0; i--){
if(siblings[i] === elem){
if(func(pos)) return next(elem);
return;
}
if(getName(siblings[i]) === getName(elem)) pos++;
}
};
},
//forms
//to consider: :target, :enabled
selected: function(next){
return function(elem){
if(hasAttrib(elem, "selected")) return next(elem);
//the first <option> in a <select> is also selected
//TODO this only works for direct descendents
if(getName(getParent(elem)) !== "option") return;
if(getFirstElement(getSiblings(elem)) === elem) return next(elem);
};
},
disabled: function(next){
return function(elem){
if(hasAttrib(elem, "disabled")) return next(elem);
};
},
enabled: function(next){
return function(elem){
if(!hasAttrib(elem, "disabled")) return next(elem);
};
},
checked: function(next){
return function(elem){
if(hasAttrib(elem, "checked")) return next(elem);
};
},
//jQuery extensions
header: function(next){
return function(elem){
var name = getName(elem);
if(
name === "h1" ||
name === "h2" ||
name === "h3" ||
name === "h4" ||
name === "h5" ||
name === "h6"
) return next(elem);
};
},
button: function(next){
return function(elem){
if(
getName(elem) === "button" ||
getName(elem) === "input" &&
hasAttrib(elem, "type") &&
getAttributeValue(elem, "type") === "button"
) return next(elem);
};
},
input: function(next){
return function(elem){
var name = getName(elem);
if(
name === "input" ||
name === "textarea" ||
name === "select" ||
name === "button"
) return next(elem);
};
},
text: function(next){
return function(elem){
if(getName(elem) !== "input") return;
if(
!hasAttrib(elem, "type") ||
getAttributeValue(elem, "type") === "text"
) return next(elem);
};
},
checkbox: getAttribFunc("type", "checkbox"),
file: getAttribFunc("type", "file"),
password: getAttribFunc("type", "password"),
radio: getAttribFunc("type", "radio"),
reset: getAttribFunc("type", "reset"),
image: getAttribFunc("type", "image"),
submit: getAttribFunc("type", "submit")
};
//while filters are precompiled, pseudos get called when they are needed
var pseudos = {};
//helper methods
function getSiblings(elem){
return getParent(elem) && getChildren(getParent(elem));
}
/*
finds the position of an element among its siblings
*/
function getIndex(elem){
var siblings = getSiblings(elem);
if(!siblings) return -1;
for(var count = 0, i = 0, j = siblings.length; i < j; i++){
if(siblings[i] === elem) return count;
if(isElement(siblings[i])) count++;
}
return -1;
}
function getFirstElement(elems){
if(!elems) return;
for(var i = 0, j = elems.length; i < j; i++){
if(isElement(elems[i])) return elems[i];
}
}
/*
returns a function that checks if an elements index matches the given rule
highly optimized to return the fastest solution
*/
var re_nthElement = /^([+\-]?\d*n)?\s*(?:([+\-]?)\s*(\d+))?$/;
function getNCheck(formula){
var a, b;
//parse the formula
//b is lowered by 1 as the rule uses index 1 as the start
formula = formula.trim().toLowerCase();
if(formula === "even"){
a = 2;
b = -1;
} else if(formula === "odd"){
a = 2;
b = 0;
}
else {
formula = formula.match(re_nthElement);
if(!formula){
//TODO forward rule to error
throw new SyntaxError("n-th rule couldn't be parsed");
}
if(formula[1]){
a = parseInt(formula[1], 10);
if(!a){
if(formula[1].charAt(0) === "-") a = -1;
else a = 1;
}
} else a = 0;
if(formula[3]) b = parseInt((formula[2] || "") + formula[3], 10) - 1;
else b = -1;
}
//when b <= 0, a*n won't be possible for any matches when a < 0
//besides, the specification says that no element is matched when a and b are 0
if(b < 0 && a <= 0) return falseFunc;
//when b <= 0 and a === 1, they match any element
if(b < 0 && a === 1) return trueFunc;
//when a is in the range -1..1, it matches any element (so only b is checked)
if(a ===-1) return function(pos){ return pos <= b; };
if(a === 1) return function(pos){ return pos >= b; };
if(a === 0) return function(pos){ return pos === b; };
//when a > 0, modulo can be used to check if there is a match
//TODO: needs to be checked
if(a > 1) return function(pos){
return pos >= 0 && (pos -= b) >= 0 && (pos % a) === 0;
};
a *= -1; //make a positive
return function(pos){
return pos >= 0 && (pos -= b) >= 0 && (pos % a) === 0 && pos/a < b;
};
}
function getAttribFunc(name, value){
return function(next){
return checkAttrib(next, name, value);
};
}
function checkAttrib(next, name, value){
return function(elem){
if(hasAttrib(elem, name) && getAttributeValue(elem, name) === value){
return next(elem);
}
};
}
function rootFunc(){
return true;
}
function trueFunc(){
return true;
}
function falseFunc(){
return false;
}
/*
all available rules
*/
var generalRules = {
__proto__: null,
//tags
tag: function(next, data){
var name = data.name;
return function(elem){
if(getName(elem) === name) return next(elem);
};
},
//traversal
descendant: function(next){
return function(elem){
while(elem = getParent(elem)){
if(next(elem)) return true;
}
};
},
child: function(next){
return function(elem){
var parent = getParent(elem);
if(parent) return next(parent);
};
},
sibling: function(next){
return function(elem){
var siblings = getSiblings(elem);
if(!siblings) return;
for(var i = 0, j = siblings.length; i < j; i++){
if(!isElement(siblings[i])) continue;
if(siblings[i] === elem) return;
if(next(siblings[i])) return true;
}
};
},
adjacent: function(next){
return function(elem){
var siblings = getSiblings(elem),
lastElement;
if(!siblings) return;
for(var i = 0, j = siblings.length; i < j; i++){
if(isElement(siblings[i])){
if(siblings[i] === elem){
if(lastElement) return next(lastElement);
return;
}
lastElement = siblings[i];
}
}
};
},
universal: function(next){
if(next === rootFunc) return trueFunc;
return next;
},
//attributes
attribute: function(next, data){
if(data.ignoreCase){
return noCaseAttributeRules[data.action](next, data.name, data.value, data.ignoreCase);
} else {
return attributeRules[data.action](next, data.name, data.value, data.ignoreCase);
}
},
//pseudos
pseudo: function(next, data){
var name = data.name,
subselect = data.data;
if(name in filters) return filters[name](next, subselect);
else if(name in pseudos){
return function(elem){
if(pseudos[name](elem, subselect)) return next(elem);
};
} else {
throw new SyntaxError("unmatched pseudo-class: " + name);
}
}
};
/*
attribute selectors
*/
var reChars = /[-[\]{}()*+?.,\\^$|#\s]/g; //https://github.com/slevithan/XRegExp/blob/master/src/xregexp.js#L469
function escapeRe(str){
return str.replace(reChars, "\\$&");
}
function wrapReRule(pre, post){
return function(next, name, value, ignoreCase){
var regex = new RegExp(pre + escapeRe(value) + post, ignoreCase ? "i" : "");
return function(elem){
if(hasAttrib(elem, name) && regex.test(getAttributeValue(elem, name))) return next(elem);
};
};
}
var noCaseAttributeRules = {
__proto__: null,
exists: function(next, name){
return function(elem){
if(hasAttrib(elem, name)) return next(elem);
};
},
element: wrapReRule("(?:^|\\s)", "(?:$|\\s)"),
equals: wrapReRule("^", "$"),
hyphen: wrapReRule("^", "(?:$|-)"),
start: wrapReRule("^", ""),
end: wrapReRule("", "$"),
any: wrapReRule("", ""),
not: wrapReRule("^(?!^", "$)")
};
var attributeRules = {
__proto__: null,
equals: checkAttrib,
exists: noCaseAttributeRules.exists,
hyphen: noCaseAttributeRules.hyphen,
element: noCaseAttributeRules.element,
start: function(next, name, value){
var len = value.length;
return function(elem){
if(
hasAttrib(elem, name) &&
getAttributeValue(elem, name).substr(0, len) === value
) return next(elem);
};
},
end: function(next, name, value){
var len = -value.length;
return function(elem){
if(
hasAttrib(elem, name) &&
getAttributeValue(elem, name).substr(len) === value
) return next(elem);
};
},
any: function(next, name, value){
return function(elem){
if(
hasAttrib(elem, name) &&
getAttributeValue(elem, name).indexOf(value) >= 0
) return next(elem);
};
},
not: function(next, name, value){
if(value === ""){
return function(elem){
if(hasAttrib(elem, name) && getAttributeValue(elem, name) !== "") return next(elem);
};
}
return function(elem){
if(!hasAttrib(elem, name) || getAttributeValue(elem, name) !== value){
return next(elem);
}
};
}
};
/*
sort the parts of the passed selector,
as there is potential for optimization
*/
var procedure = {
__proto__: null,
universal: 5, //should be last so that it can be ignored
tag: 3, //very quick test
attribute: 1, //can be faster than class
pseudo: 0, //can be pretty expensive (especially :has)
//everything else shouldn't be moved
descendant: -1,
child: -1,
sibling: -1,
adjacent: -1
};
function sortByProcedure(arr){
//TODO optimize, sort individual attribute selectors
var parts = [],
last = 0,
end = false;
for(var i = 0, j = arr.length-1; i <= j; i++){
if(procedure[arr[i].type] === -1 || (end = i === j)){
if(end) i++;
parts = parts.concat(arr.slice(last, i).sort(function(a, b){
return procedure[a.type] - procedure[b.type];
}));
if(!end) last = parts.push(arr[i]);
}
}
return parts;
}
function parse(selector){
var functions = CSSwhat(selector).map(function(arr){
var func = rootFunc;
arr = sortByProcedure(arr);
for(var i = 0, j = arr.length; i < j; i++){
func = generalRules[arr[i].type](func, arr[i]);
if(func === falseFunc) return func;
}
return func;
}).filter(function(func){
return func !== rootFunc && func !== falseFunc;
});
var num = functions.length;
if(num === 0) return falseFunc;
if(num === 1) return functions[0];
if(functions.indexOf(trueFunc) >= 0) return trueFunc;
return function(elem){
for(var i = 0; i < num; i++){
if(functions[i](elem)) return true;
}
return false;
};
}
/*
the exported interface
*/
var CSSselect = function(query, elems){
if(typeof query !== "function") query = parse(query);
if(arguments.length === 1) return query;
return CSSselect.iterate(query, elems);
};
CSSselect.parse = parse;
CSSselect.filters = filters;
CSSselect.pseudos = pseudos;
CSSselect.iterate = function(query, elems){
if(typeof query !== "function") query = parse(query);
if(query === falseFunc) return [];
if(!Array.isArray(elems)) elems = getChildren(elems);
return iterate(query, elems);
};
CSSselect.is = function(elem, query){
if(typeof query !== "function") query = parse(query);
return query(elem);
};
function iterate(query, elems){
var result = [];
for(var i = 0, j = elems.length; i < j; i++){
if(!isElement(elems[i])) continue;
if(query(elems[i])) result.push(elems[i]);
if(getChildren(elems[i])) result = result.concat(iterate(query, getChildren(elems[i])));
}
return result;
}
/*
export CSSselect
*/
if(typeof module !== "undefined" && "exports" in module){
module.exports = CSSselect;
} else {
if(typeof define === "function" && define.amd){
define("CSSselect", function(){
return CSSselect;
});
}
global.CSSselect = CSSselect;
}
})(
typeof window === "object" ? window : this,
typeof CSSwhat === "undefined" ? require("CSSwhat") : CSSwhat
);

View File

@@ -0,0 +1,5 @@
language: node_js
node_js:
- 0.4
- 0.6
- 0.7

View File

@@ -0,0 +1,11 @@
Copyright (c) Felix Böhm
All rights reserved.
Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
THIS IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS,
EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

View File

@@ -0,0 +1,136 @@
;(function(global){ "use strict";
//regexps
var re_name = /^(?:\\.|[\w\-\u00c0-\uFFFF])+/,
re_cleanSelector = /([^\\])\s*([>~+]|$)\s*/g,
re_nthElement = /^([+\-]?\d*n)?\s*([+\-])?\s*(\d)?$/,
re_escapedCss = /\\(\d{6}|.)/g,
re_nonNumeric = /^\D$/,
re_attr = /^\s*((?:\\.|[\w\u00c0-\uFFFF\-])+)\s*(?:(\S?)=\s*(?:(['"])(.*?)\3|(#?(?:\\.|[\w\u00c0-\uFFFF\-])*)|)|)\s*(i)?\]/; //https://github.com/jquery/sizzle/blob/master/sizzle.js#L374
var actionTypes = {
__proto__: null,
"undefined": "exists",
"": "equals",
"~": "element",
"^": "start",
"$": "end",
"*": "any",
"!": "not",
"|": "hyphen"
};
var simpleSelectors = {
__proto__: null,
">": "child",
"~": "sibling",
"+": "adjacent",
"*": "universal"
};
var attribSelectors = {
__proto__: null,
"#": ["id", "equals"],
".": ["class", "element"]
};
function unescapeCSS(str){
//based on http://mathiasbynens.be/notes/css-escapes
//TODO support short sequences (/\\\d{1,5} /)
return str.replace(re_escapedCss, function(m, s){
if (re_nonNumeric.test(s)) return s;
return String.fromCharCode(parseInt(s, 10));
});
}
function getClosingPos(selector){
for(var pos = 1, counter = 1, len = selector.length; counter > 0 && pos < len; pos++){
if(selector.charAt(pos) === "(") counter++;
else if(selector.charAt(pos) === ")") counter--;
}
return pos;
}
function parse(selector){
selector = (selector + "").trimLeft().replace(re_cleanSelector, "$1$2");
var subselects = [],
tokens = [],
data, firstChar, name;
function getName(){
var sub = selector.match(re_name)[0];
selector = selector.substr(sub.length);
return unescapeCSS(sub);
}
while(selector !== ""){
if(re_name.test(selector)){
tokens.push({type: "tag", name: getName().toLowerCase()});
} else if(/^\s/.test(selector)){
tokens.push({type: "descendant"});
selector = selector.trimLeft();
} else {
firstChar = selector.charAt(0);
selector = selector.substr(1);
if(firstChar in simpleSelectors){
tokens.push({type: simpleSelectors[firstChar]});
} else if(firstChar in attribSelectors){
tokens.push({
type: "attribute",
name: attribSelectors[firstChar][0],
action: attribSelectors[firstChar][1],
value: getName(),
ignoreCase: false
});
} else if(firstChar === "["){
data = selector.match(re_attr);
selector = selector.substr(data[0].length);
tokens.push({
type: "attribute",
name: unescapeCSS(data[1]),
action: actionTypes[data[2]],
value: unescapeCSS(data[4] || data[5] || ""),
ignoreCase: !!data[6]
});
} else if(firstChar === ":"){
//if(selector.charAt(0) === ":"){} //TODO pseudo-element
name = getName();
data = "";
if(selector.charAt(0) === "("){
var pos = getClosingPos(selector);
data = selector.substr(1, pos - 2);
selector = selector.substr(pos);
}
tokens.push({type: "pseudo", name: name, data: data});
} else if(firstChar === ","){
subselects.push(tokens);
tokens = [];
} else {
//otherwise, the parser needs to throw or it would enter an infinite loop
throw new Error("Unmatched selector:" + firstChar + selector);
}
}
}
subselects.push(tokens);
return subselects;
}
if(typeof module !== "undefined" && "exports" in module){
module.exports = parse;
} else {
if(typeof define === "function" && define.amd){
define("CSSwhat", function(){
return parse;
});
}
global.CSSwhat = parse;
}
})(typeof window === "object" ? window : this);

View File

@@ -0,0 +1,32 @@
{
"author": {
"name": "Felix Böhm",
"email": "me@feedic.com",
"url": "http://feedic.com"
},
"name": "CSSwhat",
"description": "a CSS selector parser",
"version": "0.1.1",
"repository": {
"url": "https://github.com/FB55/CSSwhat"
},
"main": "/index.js",
"scripts": {
"test": "node tests/test.js"
},
"dependencies": {},
"devDependencies": {},
"optionalDependencies": {},
"engines": {
"node": "*"
},
"license": "BSD-like",
"readme": "#CSSwhat [![Build Status](https://secure.travis-ci.org/FB55/CSSwhat.png?branch=master)](http://travis-ci.org/FB55/CSSwhat)\n\na CSS selector parser\n\n__// TODO__",
"readmeFilename": "readme.md",
"_id": "CSSwhat@0.1.1",
"dist": {
"shasum": "489865be1fe831c4a9f5be82cb0ea2843605d718"
},
"_from": "CSSwhat@>= 0.1",
"_resolved": "https://registry.npmjs.org/CSSwhat/-/CSSwhat-0.1.1.tgz"
}

View File

@@ -0,0 +1,5 @@
#CSSwhat [![Build Status](https://secure.travis-ci.org/FB55/CSSwhat.png?branch=master)](http://travis-ci.org/FB55/CSSwhat)
a CSS selector parser
__// TODO__

View File

@@ -0,0 +1,42 @@
var deepEquals = require("assert").deepEqual,
CSSwhat = require("../");
var tests = [
["div", [ [ { type: 'tag', name: 'div' } ] ], "simple tag"],
["*", [ [ { type: 'universal' } ] ], "universal"],
//traversal
["div div", [ [ { type: 'tag', name: 'div' },
{ type: 'descendant' },
{ type: 'tag', name: 'div' } ] ], "descendant"],
["div\t \n \tdiv", [ [ { type: 'tag', name: 'div' },
{ type: 'descendant' },
{ type: 'tag', name: 'div' } ] ], "descendant /w whitespace"],
["div + div", [ [ { type: 'tag', name: 'div' },
{ type: 'adjacent' },
{ type: 'tag', name: 'div' } ] ], "adjacent"],
["div ~ div", [ [ { type: 'tag', name: 'div' },
{ type: 'sibling' },
{ type: 'tag', name: 'div' } ] ], "sibling"],
//Escaped whitespace
["#\\ > a ", [ [ { type: 'attribute', action: 'equals', name: 'id', value: ' ', ignoreCase: false }, { type: 'child' }, { type: 'tag', name: 'a' } ] ], "Space between escaped space and combinator" ],
[".\\ ", [ [ { type: 'attribute', name: 'class', action: 'element', value: ' ', ignoreCase: false } ] ], "Space after escaped space" ],
//attributes
["[name^='foo[']",[[{"type":"attribute","name":"name","action":"start","value":"foo[","ignoreCase":false}]],"escaped attribute"],
["[name^='foo[bar]']",[[{"type":"attribute","name":"name","action":"start","value":"foo[bar]","ignoreCase":false}]],"escaped attribute"],
["[name$='[bar]']",[[{"type":"attribute","name":"name","action":"end","value":"[bar]","ignoreCase":false}]],"escaped attribute"],
["[href *= 'google']",[[{"type":"attribute","name":"href","action":"any","value":"google","ignoreCase":false}]],"escaped attribute"],
["[name=foo\\.baz]",[[{"type":"attribute","name":"name","action":"equals","value":"foo.baz","ignoreCase":false}]],"escaped attribute"],
["[name=foo\\[bar\\]]",[[{"type":"attribute","name":"name","action":"equals","value":"foo[bar]","ignoreCase":false}]],"escaped attribute"],
["[xml\\:test]",[[{"type":"attribute","name":"xml:test","action":"exists","value":"","ignoreCase":false}]],"escaped attribute"]
//TODO
];
tests.forEach(function(arr){
arr[0] = CSSwhat(arr[0]);
deepEquals.apply(null, arr);
console.log(arr[2], "passed");
});

View File

@@ -0,0 +1,40 @@
{
"name": "CSSselect",
"version": "0.3.1",
"description": "a rtl CSS selector engine",
"author": {
"name": "Felix Boehm",
"email": "me@feedic.com"
},
"keywords": [
"css",
"selector"
],
"main": "index.js",
"engine": "",
"repository": {
"type": "git",
"url": "git://github.com/fb55/cssselect.git"
},
"dependencies": {
"CSSwhat": ">= 0.1"
},
"devDependencies": {
"htmlparser2": ">= 2.2.8",
"cheerio-soupselect": "*",
"mocha": "*",
"expect.js": "*"
},
"scripts": {
"test": "mocha -u exports -R list tests/qwery tests/nwmatcher/scotch.js"
},
"license": "BSD-like",
"readme": "#CSSselect [![Build Status](https://secure.travis-ci.org/fb55/CSSselect.png?branch=master)](http://travis-ci.org/fb55/CSSselect)\n\n##What?\n\nCSSselect is CSS selector engine. It returns a function that tests elements if they match a selector - checking needs to happen \"from the top\", like browser engines execute queries.\n\n##Why?\n\nJust take the following CSS query: `foo bar baz`. When the element named `baz` has like a billion children, every one of them needs to be checked if they match a query. Three times, to be precise, if you run a CSS query from the start to the end (as e.g. JSDOM does). Yup, that's slow.\n\nThis library checks every element once. The more complex the query, the greater the benefit.\n\n##How?\n\nBy stacking functions!\n\n##TODO\n\n1. The API needs to be improved\n2. Documentation needs to be written",
"readmeFilename": "README.md",
"_id": "CSSselect@0.3.1",
"dist": {
"shasum": "ad91c2821658320c5047ba899201a236922c42f9"
},
"_from": "CSSselect@0.x",
"_resolved": "https://registry.npmjs.org/CSSselect/-/CSSselect-0.3.1.tgz"
}

View File

@@ -0,0 +1,10 @@
var CSSselect = require("../"),
ben = require("ben"),
testString = "doo, *#foo > elem.bar[class$=bAz i]:not([ id *= \"2\" ])",
helper = require("./helper.js"),
parse = require("../"),
dom = helper.getDefaultDom();
console.log("Parsing took:", ben(1e5, function(){CSSselect(testString);}));
testString = parse(testString);
console.log("Executing took:", ben(1e6, function(){CSSselect.iterate(testString, dom);})*1e3);

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,27 @@
var htmlparser2 = require("htmlparser2"),
Parser = htmlparser2.Parser,
Handler = htmlparser2.DomHandler,
CSSselect = require("../");
module.exports = {
CSSselect: CSSselect,
getFile: function(name){
return module.exports.getDOM(
require("fs").readFileSync(__dirname + "/docs/" + name).toString()
);
},
getDOM: function(data){
var h = new Handler({refParent: true, ignoreWhitespace: true}),
p = new Parser(h);
p.write(data);
p.end();
return h.dom;
},
getDefaultDom: function(){
return module.exports.getDOM(
"<elem id=foo><elem class='bar baz'><tag class='boom'> This is some simple text </tag></elem></elem>"
);
}
};

View File

@@ -0,0 +1 @@
<html><body>You are being <a href="https://raw.github.com/dperini/nwmatcher/master/LICENSE">redirected</a>.</body></html>

View File

@@ -0,0 +1,359 @@
/*
taken from https://github.com/dperini/nwmatcher/blob/master/test/scotch/test.js
*/
"use strict";
var expect = require("expect.js"),
DomUtils = require("htmlparser2").DomUtils,
helper = require("../helper.js"),
document = helper.getDOM(require("fs").readFileSync(__dirname + "/test.html")+""),
CSSselect = helper.CSSselect;
//Prototype's `$` function
function getById(element){
if(arguments.length === 1){
if(typeof element === "string"){
return DomUtils.getElementById(element, document);
}
return element;
}
else return Array.prototype.map.call(arguments, function(elem){
return getById(elem);
});
}
function assertEquivalent(a, b, msg){
expect(a).to.be.eql(b);
}
function assertEqual(a, b, msg){
expect(a).to.be(b);
}
function assert(a, msg){
expect(a).to.be.ok();
}
function refute(a, msg){
expect(a).to.not.be.ok();
}
//NWMatcher methods
var select = function(query, doc){
if(arguments.length === 1 || typeof doc === "undefined") doc = document;
else if(typeof doc === "string") doc = select(doc);
return CSSselect.iterate(query, doc);
}, match = CSSselect.is;
//The tests...
module.exports = {
"Basic Selectors": {
/*
"*": function(){
//Universal selector
var results = DomUtils.getElementsByTagName("*", document);
assertEquivalent(select("*"), results, "Comment nodes should be ignored.");
},
*/
"E": function(){
//Type selector
var results = [], index = 0, nodes = DomUtils.getElementsByTagName("li", document);
while((results[index] = nodes[index++])){}
results.length--;
assertEquivalent(select("li"), results);
assertEqual(select("strong", getById("fixtures"))[0], getById("strong"));
assertEquivalent(select("nonexistent"), []);
},
"#id": function(){
//ID selector
assertEqual(select("#fixtures")[0], getById("fixtures"));
assertEquivalent(select("nonexistent"), []);
assertEqual(select("#troubleForm")[0], getById("troubleForm"));
},
".class": function(){
//Class selector
assertEquivalent(select(".first"), getById('p', 'link_1', 'item_1'));
assertEquivalent(select(".second"), []);
},
"E#id": function(){
assertEqual(select("strong#strong")[0], getById("strong"));
assertEquivalent(select("p#strong"), []);
},
"E.class": function(){
var secondLink = getById("link_2");
assertEquivalent(select('a.internal'), getById('link_1', 'link_2'));
assertEqual(select('a.internal.highlight')[0], secondLink);
assertEqual(select('a.highlight.internal')[0], secondLink);
assertEquivalent(select('a.highlight.internal.nonexistent'), []);
},
"#id.class": function(){
var secondLink = getById('link_2');
assertEqual(select('#link_2.internal')[0], secondLink);
assertEqual(select('.internal#link_2')[0], secondLink);
assertEqual(select('#link_2.internal.highlight')[0], secondLink);
assertEquivalent(select('#link_2.internal.nonexistent'), []);
},
"E#id.class": function(){
var secondLink = getById('link_2');
assertEqual(select('a#link_2.internal')[0], secondLink);
assertEqual(select('a.internal#link_2')[0], secondLink);
assertEqual(select('li#item_1.first')[0], getById("item_1"));
assertEquivalent(select('li#item_1.nonexistent'), []);
assertEquivalent(select('li#item_1.first.nonexistent'), []);
}
},
"Attribute Selectors": {
"[foo]": function(){
var body = DomUtils.getElementsByTagName("body", document, true, 1)[0];
assertEquivalent(select('[href]', body), select('a[href]', body));
assertEquivalent(select('[class~=internal]'), select('a[class~="internal"]'));
assertEquivalent(select('[id]'), select('*[id]'));
assertEquivalent(select('[type=radio]'), getById('checked_radio', 'unchecked_radio'));
assertEquivalent(select('[type=checkbox]'), select('*[type=checkbox]'));
assertEquivalent(select('[title]'), getById('with_title', 'commaParent'));
assertEquivalent(select('#troubleForm [type=radio]'), select('#troubleForm *[type=radio]'));
assertEquivalent(select('#troubleForm [type]'), select('#troubleForm *[type]'));
},
"E[foo]": function(){
assertEquivalent(select('h1[class]'), select('#fixtures h1'), "h1[class]");
//assertEquivalent(select('h1[CLASS]'), select('#fixtures h1'), "h1[CLASS]");
assertEqual(select('li#item_3[class]')[0], getById('item_3'), "li#item_3[class]");
assertEquivalent(select('#troubleForm2 input[name="brackets[5][]"]'), getById('chk_1', 'chk_2'));
//Brackets in attribute value
assertEqual(select('#troubleForm2 input[name="brackets[5][]"]:checked')[0], getById('chk_1'));
//Space in attribute value
assertEqual(select('cite[title="hello world!"]')[0], getById('with_title'));
/*
//Namespaced attributes
assertEquivalent(select('[xml:lang]'), [document, getById("item_3")]);
assertEquivalent(select('*[xml:lang]'), [document, getById("item_3")]);
*/
},
'E[foo="bar"]': function(){
assertEquivalent(select('a[href="#"]'), getById('link_1', 'link_2', 'link_3'));
/*this.assertThrowsException(/SYNTAX_ERR/, function(){
select('a[href=#]');
});*/
assertEqual(select('#troubleForm2 input[name="brackets[5][]"][value="2"]')[0], getById('chk_2'));
},
'E[foo~="bar"]': function(){
assertEquivalent(select('a[class~="internal"]'), getById('link_1', 'link_2'), "a[class~=\"internal\"]");
assertEquivalent(select('a[class~=internal]'), getById('link_1', 'link_2'), "a[class~=internal]");
assertEqual(select('a[class~=external][href="#"]')[0], getById('link_3'), 'a[class~=external][href="#"]');
},
/*
'E[foo|="en"]': function(){
assertEqual(select('*[xml:lang|="es"]')[0], getById('item_3'));
assertEqual(select('*[xml:lang|="ES"]')[0], getById('item_3'));
},
*/
'E[foo^="bar"]': function(){
assertEquivalent(select('div[class^=bro]'), getById('father', 'uncle'), 'matching beginning of string');
assertEquivalent(select('#level1 *[id^="level2_"]'), getById('level2_1', 'level2_2', 'level2_3'));
assertEquivalent(select('#level1 *[id^=level2_]'), getById('level2_1', 'level2_2', 'level2_3'));
},
'E[foo$="bar"]': function(){
assertEquivalent(select('div[class$=men]'), getById('father', 'uncle'), 'matching end of string');
assertEquivalent(select('#level1 *[id$="_1"]'), getById('level2_1', 'level3_1'));
assertEquivalent(select('#level1 *[id$=_1]'), getById('level2_1', 'level3_1'));
},
'E[foo*="bar"]': function(){
assertEquivalent(select('div[class*="ers m"]'), getById('father', 'uncle'), 'matching substring');
assertEquivalent(select('#level1 *[id*="2"]'), getById('level2_1', 'level3_2', 'level2_2', 'level2_3'));
/*this.assertThrowsException(/SYNTAX_ERR/, function(){
select('#level1 *[id*=2]');
});*/
}
// *** these should throw SYNTAX_ERR ***
/*'E[id=-1]': function(){
this.assertThrowsException(/SYNTAX_ERR/, function(){
select('#level1 *[id=-1]');
});
},
'E[class=-45deg]': function(){
this.assertThrowsException(/SYNTAX_ERR/, function(){
select('#level1 *[class=-45deg]');
});
},
'E[class=8mm]': function(){
this.assertThrowsException(/SYNTAX_ERR/, function(){
select('#level1 *[class=8mm]');
});
}*/
},
"Structural pseudo-classes": {
"E:first-child": function(){
assertEqual(select('#level1>*:first-child')[0], getById('level2_1'));
assertEquivalent(select('#level1 *:first-child'), getById('level2_1', 'level3_1', 'level_only_child'));
assertEquivalent(select('#level1>div:first-child'), []);
assertEquivalent(select('#level1 span:first-child'), getById('level2_1', 'level3_1'));
assertEquivalent(select('#level1:first-child'), []);
},
"E:last-child": function(){
assertEqual(select('#level1>*:last-child')[0], getById('level2_3'));
assertEquivalent(select('#level1 *:last-child'), getById('level3_2', 'level_only_child', 'level2_3'));
assertEqual(select('#level1>div:last-child')[0], getById('level2_3'));
assertEqual(select('#level1 div:last-child')[0], getById('level2_3'));
assertEquivalent(select('#level1>span:last-child'), []);
},
"E:nth-child(n)": function(){
assertEqual(select('#p *:nth-child(3)')[0], getById('link_2'));
assertEqual(select('#p a:nth-child(3)')[0], getById('link_2'), 'nth-child');
assertEquivalent(select('#list > li:nth-child(n+2)'), getById('item_2', 'item_3'));
assertEquivalent(select('#list > li:nth-child(-n+2)'), getById('item_1', 'item_2'));
},
"E:nth-of-type(n)": function(){
assertEqual(select('#p a:nth-of-type(2)')[0], getById('link_2'), 'nth-of-type');
assertEqual(select('#p a:nth-of-type(1)')[0], getById('link_1'), 'nth-of-type');
},
"E:nth-last-of-type(n)": function(){
assertEqual(select('#p a:nth-last-of-type(1)')[0], getById('link_2'), 'nth-last-of-type');
},
"E:first-of-type": function(){
assertEqual(select('#p a:first-of-type')[0], getById('link_1'), 'first-of-type');
},
"E:last-of-type": function(){
assertEqual(select('#p a:last-of-type')[0], getById('link_2'), 'last-of-type');
},
"E:only-child": function(){
assertEqual(select('#level1 *:only-child')[0], getById('level_only_child'));
//Shouldn't return anything
assertEquivalent(select('#level1>*:only-child'), []);
assertEquivalent(select('#level1:only-child'), []);
assertEquivalent(select('#level2_2 :only-child:not(:last-child)'), []);
assertEquivalent(select('#level2_2 :only-child:not(:first-child)'), []);
}/*,
"E:empty": function(){
getById('level3_1').children = [];
assertEquivalent(select('#level1 *:empty'), getById('level3_1', 'level3_2', 'level2_3'), '#level1 *:empty');
assertEquivalent(select('#level_only_child:empty'), [], 'newlines count as content!');
//Shouldn't return anything
assertEquivalent(select('span:empty > *'), []);
}*/
},
"E:not(s)": function(){
//Negation pseudo-class
assertEquivalent(select('a:not([href="#"])'), []);
assertEquivalent(select('div.brothers:not(.brothers)'), []);
assertEquivalent(select('a[class~=external]:not([href="#"])'), [], 'a[class~=external][href!="#"]');
assertEqual(select('#p a:not(:first-of-type)')[0], getById('link_2'), 'first-of-type');
assertEqual(select('#p a:not(:last-of-type)')[0], getById('link_1'), 'last-of-type');
assertEqual(select('#p a:not(:nth-of-type(1))')[0], getById('link_2'), 'nth-of-type');
assertEqual(select('#p a:not(:nth-last-of-type(1))')[0], getById('link_1'), 'nth-last-of-type');
assertEqual(select('#p a:not([rel~=nofollow])')[0], getById('link_2'), 'attribute 1');
assertEqual(select('#p a:not([rel^=external])')[0], getById('link_2'), 'attribute 2');
assertEqual(select('#p a:not([rel$=nofollow])')[0], getById('link_2'), 'attribute 3');
assertEqual(select('#p a:not([rel$="nofollow"]) > em')[0], getById('em'), 'attribute 4');
assertEqual(select('#list li:not(#item_1):not(#item_3)')[0], getById('item_2'), 'adjacent :not clauses');
assertEqual(select('#grandfather > div:not(#uncle) #son')[0], getById('son'));
assertEqual(select('#p a:not([rel$="nofollow"]) em')[0], getById('em'), 'attribute 4 + all descendants');
assertEqual(select('#p a:not([rel$="nofollow"])>em')[0], getById('em'), 'attribute 4 (without whitespace)');
},
"UI element states pseudo-classes": {
"E:disabled": function(){
assertEqual(select('#troubleForm > p > *:disabled')[0], getById('disabled_text_field'));
},
"E:checked": function(){
assertEquivalent(select('#troubleForm *:checked'), getById('checked_box', 'checked_radio'));
}
},
"Combinators": {
"E F": function(){
//Descendant
assertEquivalent(select('#fixtures a *'), getById('em2', 'em', 'span'));
assertEqual(select('div#fixtures p')[0], getById("p"));
},
"E + F": function(){
//Adjacent sibling
assertEqual(select('div.brothers + div.brothers')[0], getById("uncle"));
assertEqual(select('div.brothers + div')[0], getById('uncle'));
assertEqual(select('#level2_1+span')[0], getById('level2_2'));
assertEqual(select('#level2_1 + span')[0], getById('level2_2'));
assertEqual(select('#level2_1 + *')[0], getById('level2_2'));
assertEquivalent(select('#level2_2 + span'), []);
assertEqual(select('#level3_1 + span')[0], getById('level3_2'));
assertEqual(select('#level3_1 + *')[0], getById('level3_2'));
assertEquivalent(select('#level3_2 + *'), []);
assertEquivalent(select('#level3_1 + em'), []);
},
"E > F": function(){
//Child
assertEquivalent(select('p.first > a'), getById('link_1', 'link_2'));
assertEquivalent(select('div#grandfather > div'), getById('father', 'uncle'));
assertEquivalent(select('#level1>span'), getById('level2_1', 'level2_2'));
assertEquivalent(select('#level1 > span'), getById('level2_1', 'level2_2'));
assertEquivalent(select('#level2_1 > *'), getById('level3_1', 'level3_2'));
assertEquivalent(select('div > #nonexistent'), []);
},
"E ~ F": function(){
//General sibling
assertEqual(select('h1 ~ ul')[0], getById('list'));
assertEquivalent(select('#level2_2 ~ span'), []);
assertEquivalent(select('#level3_2 ~ *'), []);
assertEquivalent(select('#level3_1 ~ em'), []);
assertEquivalent(select('div ~ #level3_2'), []);
assertEquivalent(select('div ~ #level2_3'), []);
assertEqual(select('#level2_1 ~ span')[0], getById('level2_2'));
assertEquivalent(select('#level2_1 ~ *'), getById('level2_2', 'level2_3'));
assertEqual(select('#level3_1 ~ #level3_2')[0], getById('level3_2'));
assertEqual(select('span ~ #level3_2')[0], getById('level3_2'));
}
},
"NW.Dom.match": function(){
var element = getById('dupL1');
//Assertions
assert(match(element, 'span'));
assert(match(element, "span#dupL1"));
assert(match(element, "div > span"), "child combinator");
assert(match(element, "#dupContainer span"), "descendant combinator");
assert(match(element, "#dupL1"), "ID only");
assert(match(element, "span.span_foo"), "class name 1");
assert(match(element, "span.span_bar"), "class name 2");
assert(match(element, "span:first-child"), "first-child pseudoclass");
//Refutations
refute(match(element, "span.span_wtf"), "bogus class name");
refute(match(element, "#dupL2"), "different ID");
refute(match(element, "div"), "different tag name");
refute(match(element, "span span"), "different ancestry");
refute(match(element, "span > span"), "different parent");
refute(match(element, "span:nth-child(5)"), "different pseudoclass");
//Misc.
refute(match(getById('link_2'), 'a[rel^=external]'));
assert(match(getById('link_1'), 'a[rel^=external]'));
assert(match(getById('link_1'), 'a[rel^="external"]'));
assert(match(getById('link_1'), "a[rel^='external']"));
},
"Equivalent Selectors": function(){
assertEquivalent(select('div.brothers'), select('div[class~=brothers]'));
assertEquivalent(select('div.brothers'), select('div[class~=brothers].brothers'));
assertEquivalent(select('div:not(.brothers)'), select('div:not([class~=brothers])'));
assertEquivalent(select('li ~ li'), select('li:not(:first-child)'));
assertEquivalent(select('ul > li'), select('ul > li:nth-child(n)'));
assertEquivalent(select('ul > li:nth-child(even)'), select('ul > li:nth-child(2n)'));
assertEquivalent(select('ul > li:nth-child(odd)'), select('ul > li:nth-child(2n+1)'));
assertEquivalent(select('ul > li:first-child'), select('ul > li:nth-child(1)'));
assertEquivalent(select('ul > li:last-child'), select('ul > li:nth-last-child(1)'));
/* Opera 10 does not accept values > 128 as a parameter to :nth-child
See <http://operawiki.info/ArtificialLimits> */
assertEquivalent(select('ul > li:nth-child(n-128)'), select('ul > li'));
assertEquivalent(select('ul>li'), select('ul > li'));
assertEquivalent(select('#p a:not([rel$="nofollow"])>em'), select('#p a:not([rel$="nofollow"]) > em'));
},
"Multiple Selectors": function(){
//The next two assertions should return document-ordered lists of matching elements --Diego Perini
//assertEquivalent(select('#list, .first,*[xml:lang="es-us"] , #troubleForm'), getById('p', 'link_1', 'list', 'item_1', 'item_3', 'troubleForm'));
//assertEquivalent(select('#list, .first, *[xml:lang="es-us"], #troubleForm'), getById('p', 'link_1', 'list', 'item_1', 'item_3', 'troubleForm'));
assertEquivalent(select('form[title*="commas,"], input[value="#commaOne,#commaTwo"]'), getById('commaParent', 'commaChild'));
assertEquivalent(select('form[title*="commas,"], input[value="#commaOne,#commaTwo"]'), getById('commaParent', 'commaChild'));
}
};

View File

@@ -0,0 +1,92 @@
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
<head>
<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
<title>NWMatcher Tests</title>
<link rel="stylesheet" type="text/css" href="assets/style.css" media="screen" />
<script type="text/javascript" src="../../src/nwmatcher.js"></script>
<script type="text/javascript" src="scotch.js"></script>
<script type="text/javascript" src="test.js"></script>
</head>
<body>
<div id="container">
<div id="testlog" class="log"></div>
<!-- Test elements -->
<div id="fixtures" style="display: none;">
<h1 class="title">Some title <span>here</span></h1>
<p id="p" class="first summary">
<strong id="strong">This</strong> is a short blurb
<a id="link_1" class="first internal" rel="external nofollow" href="#">with a <em id="em2">link</em></a> or
<a id="link_2" class="internal highlight" href="#"><em id="em">two</em></a>.
Or <cite id="with_title" title="hello world!">a citation</cite>.
</p>
<ul id="list">
<li id="item_1" class="first"><a id="link_3" href="#" class="external"><span id="span">Another link</span></a></li>
<li id="item_2">Some text</li>
<li id="item_3" xml:lang="es-us" class="">Otra cosa</li>
</ul>
<!-- This form has a field with the name "id"; its "ID" property won't be "troubleForm" -->
<form id="troubleForm" action="">
<p>
<input type="hidden" name="id" id="hidden" />
<input type="text" name="disabled_text_field" id="disabled_text_field" disabled="disabled" />
<input type="text" name="enabled_text_field" id="enabled_text_field" />
<input type="checkbox" name="checkboxes" id="checked_box" checked="checked" value="Checked" />
<input type="checkbox" name="checkboxes" id="unchecked_box" value="Unchecked"/>
<input type="radio" name="radiobuttons" id="checked_radio" checked="checked" value="Checked" />
<input type="radio" name="radiobuttons" id="unchecked_radio" value="Unchecked" />
</p>
</form>
<form id="troubleForm2" action="">
<p>
<input type="checkbox" name="brackets[5][]" id="chk_1" checked="checked" value="1" />
<input type="checkbox" name="brackets[5][]" id="chk_2" value="2" />
</p>
</form>
<div id="level1">
<span id="level2_1">
<span id="level3_1"></span>
<!-- This comment should be ignored by the adjacent selector -->
<span id="level3_2"></span>
</span>
<span id="level2_2">
<em id="level_only_child">
</em>
</span>
<div id="level2_3"></div>
</div> <!-- #level1 -->
<div id="dupContainer">
<span id="dupL1" class="span_foo span_bar">
<span id="dupL2">
<span id="dupL3">
<span id="dupL4">
<span id="dupL5"></span>
</span>
</span>
</span>
</span>
</div> <!-- #dupContainer -->
<div id="grandfather"> grandfather
<div id="father" class="brothers men"> father
<div id="son"> son </div>
</div>
<div id="uncle" class="brothers men"> uncle </div>
</div>
<form id="commaParent" title="commas,are,good" action="">
<p>
<input type="hidden" id="commaChild" name="foo" value="#commaOne,#commaTwo" />
<input type="hidden" id="commaTwo" name="foo2" value="oops" />
</p>
</form>
<div id="counted_container"><div class="is_counted"></div></div>
</div>
</div>
</body>
</html>

View File

@@ -0,0 +1,132 @@
<!DOCTYPE HTML>
<html lang="en-us">
<head>
<meta http-equiv="Content-type" content="text/html; charset=utf-8">
<title>Qwery tests</title>
<style type="text/css">
#fixtures {
position: absolute;
top: -9999px;
}
</style>
<link rel="stylesheet" href="../node_modules/sink-test/src/sink.css" type="text/css">
<script src="../node_modules/sink-test/src/sink.js"></script>
<script src="../src/qwery.js"></script>
<script src="../pseudos/qwery-pseudos.js"></script>
<script type="text/javascript">
var Q = qwery
</script>
</head>
<body>
<h1>Qwery Tests</h1>
<div id="fixtures">
<ol id="list">
<li>hello</li>
<li>world</li>
<ol>
<li>world</li>
<li id="attr-child-boosh" attr="boosh">hello</li>
</ol>
<li>humans</li>
</ol>
<div id="spaced-tokens">
<p><em><a href="#"></a></em></p>
<p></p>
</div>
<div id="pseudos">
<div class="odd pseudos pseudo-1"></div>
<div class="even pseudos pseudo-2"></div>
<div class="odd"></div>
<div class="even"></div>
<a class="odd"></a>
<div class="even"></div>
<div class="odd"></div>
</div>
<div foo="bar"></div>
<div class="a"></div>
<div class="class-with-dashes"></div>
<div id="boosh">
<!-- comment -->
<!-- comment -->
<div class="a b">
<div class="d e" test="fg" id="booshTest"></div>
<!-- comment -->
<em nopass="copyrighters" rel="copyright booshrs" test="f g"></em>
<span class="h i a"></span>
</div>
<!-- comment -->
</div>
<div id="lonelyBoosh"></div>
<div id="attr-test1" -data-attr></div>
<div id="attr-test2" -data-attr></div>
<div id="attr-test3" class="found you" -data-attr title="whatup duders"></div>
<div id="attributes">
<div test="one" unique-test="baz" id="attr-test-1"></div>
<div test="two-foo" id="attr-test-2"></div>
<div test=" three " id="attr-test-3"></div>
<a href="#aname" id="attr-test-4">aname</a>
</div>
<div class="idless">
<div class="tokens" title="one" id="token-one"></div>
<div class="tokens" title="one two" id="token-two"></div>
<div class="tokens" title="one two three #%" id="token-three">
<a href="foo" id="token-four">
<div id="token-five"></div>
</a>
</div>
</div>
<div id="order-matters" class="order-matters">
<p class="order-matters"></p>
<a class="order-matters">
<em class="order-matters"></em><b class="order-matters"></b>
</a>
</div>
<div id="direct-descend" class="oogabooga">
<div></div>
<div class="direct-descend">
<span></span>
<div class="direct-descend">
<div class="lvl2" id="toodeep"><span></span></div>
</div>
<div class="direct-descend"><span></span></div>
<div class="lvl2" id="l2">
<span></span>
<div class="direct-descend"><span></span></div>
</div>
<div class="lvl2" id="l3"></div>
</div>
<div class="ignoreme"></div>
<div class="direct-descend">
<div class="direct-descend"></div>
<div class="lvl2" id="l4"></div>
</div>
<div></div>
</div>
<div id="sibling-selector"></div>
<div class="sibling-selector" id="sib1">
<div class="sibling-selector"></div>
<div class="sibling-selector"></div>
</div>
<div class="sibling-selector" id="sib2">
<div class="sibling-selector">
<div class="sibling-selector"></div>
</div>
</div>
<div class="parent">
<h1 class="sibling oldest"></h1>
<h2 class="sibling older"></h2>
<h3 class="sibling middle"></h3>
<h4 class="sibling younger"></h4>
<h5 class="sibling youngest"></h5>
</div>
<form>
<button></button>
<input type="text">
<input type="hidden">
</form>
</div>
<ol id="tests"></ol>
<iframe id="frame" style="width: 0; height: 0; margin-left: -1000px;"></iframe>
<script src="tests.js"></script>
</body>
</html>

View File

@@ -0,0 +1,548 @@
"use strict";
var expect = require("expect.js"),
DomUtils = require("htmlparser2").DomUtils,
helper = require("../helper.js"),
document = helper.getDOM(require("fs").readFileSync(__dirname + "/index.html")+""),
CSSselect = helper.CSSselect;
var location = {hash: ""};
CSSselect.pseudos.target = function(elem){
return elem.attribs && elem.attribs.id === location.hash.substr(1);
};
//---
/*
The following is taken from https://github.com/ded/qwery/blob/master/tests/tests.js
*/
CSSselect.pseudos.humanoid = function(e, v) { return CSSselect.is(e, 'li:contains(human)') || CSSselect.is(e, 'ol:contains(human)'); };
var frag = helper.getDOM(
'<div class="d i v">' +
'<p id="oooo"><em></em><em id="emem"></em></p>' +
'</div>' +
'<p id="sep">' +
'<div class="a"><span></span></div>' +
'</p>'
);
var doc = helper.getDOM(
'<div id="hsoob">' +
'<div class="a b">' +
'<div class="d e sib" test="fg" id="booshTest"><p><span id="spanny"></span></p></div>' +
'<em nopass="copyrighters" rel="copyright booshrs" test="f g" class="sib"></em>' +
'<span class="h i a sib"></span>' +
'</div>' +
'<p class="odd"></p>' +
'</div>' +
'<div id="lonelyHsoob"></div>'
);
var el = DomUtils.getElementById('attr-child-boosh', document);
var pseudos = DomUtils.getElementById('pseudos', document).children;
module.exports = {
'Contexts': {
'should be able to pass optional context': function () {
expect(CSSselect('.a', document)).to.have.length(3); //no context found 3 elements (.a)
expect(CSSselect('.a', CSSselect('#boosh', document))).to.have.length(2); //context found 2 elements (#boosh .a)
},
/*
'should be able to pass string as context': function() {
expect(CSSselect('.a', '#boosh')).to.have.length(2); //context found 2 elements(.a, #boosh)
expect(CSSselect('.a', '.a')).to.be.empty(); //context found 0 elements(.a, .a)
expect(CSSselect('.a', '.b')).to.have.length(1); //context found 1 elements(.a, .b)
expect(CSSselect('.a', '#boosh .b')).to.have.length(1); //context found 1 elements(.a, #boosh .b)
expect(CSSselect('.b', '#boosh .b')).to.be.empty(); //context found 0 elements(.b, #boosh .b)
},
*/
/*
'should be able to pass qwery result as context': function() {
expect(CSSselect('.a', CSSselect('#boosh', document))).to.have.length(2); //context found 2 elements(.a, #boosh)
expect(CSSselect('.a', CSSselect('.a', document))).to.be.empty(); //context found 0 elements(.a, .a)
expect(CSSselect('.a', CSSselect('.b', document))).to.have.length(1); //context found 1 elements(.a, .b)
expect(CSSselect('.a', CSSselect('#boosh .b', document))).to.have.length(1); //context found 1 elements(.a, #boosh .b)
expect(CSSselect('.b', CSSselect('#boosh .b', document))).to.be.empty(); //context found 0 elements(.b, #boosh .b)
},
*/
'should not return duplicates from combinators': function () {
expect(CSSselect('#boosh,#boosh', document)).to.have.length(1); //two booshes dont make a thing go right
expect(CSSselect('#boosh,.apples,#boosh', document)).to.have.length(1); //two booshes and an apple dont make a thing go right
},
'byId sub-queries within context': function() {
expect(CSSselect('#booshTest', CSSselect('#boosh', document))).to.have.length(1); //found "#id #id"
expect(CSSselect('.a.b #booshTest', CSSselect('#boosh', document))).to.have.length(1); //found ".class.class #id"
expect(CSSselect('.a>#booshTest', CSSselect('#boosh', document))).to.have.length(1); //found ".class>#id"
expect(CSSselect('>.a>#booshTest', CSSselect('#boosh', document))).to.have.length(1); //found ">.class>#id"
expect(CSSselect('#boosh', CSSselect('#booshTest', document)).length).to.not.be.ok(); //shouldn't find #boosh (ancestor) within #booshTest (descendent)
expect(CSSselect('#boosh', CSSselect('#lonelyBoosh', document)).length).to.not.be.ok(); //shouldn't find #boosh within #lonelyBoosh (unrelated)
}
},
'CSS 1': {
'get element by id': function () {
var result = CSSselect('#boosh', document);
expect(result[0]).to.be.ok(); //found element with id=boosh
expect(CSSselect('h1', document)[0]).to.be.ok(); //found 1 h1
},
'byId sub-queries': function() {
expect(CSSselect('#boosh #booshTest', document)).to.have.length(1); //found "#id #id"
expect(CSSselect('.a.b #booshTest', document)).to.have.length(1); //found ".class.class #id"
expect(CSSselect('#boosh>.a>#booshTest', document)).to.have.length(1); //found "#id>.class>#id"
expect(CSSselect('.a>#booshTest', document)).to.have.length(1); //found ".class>#id"
},
'get elements by class': function () {
expect(CSSselect('#boosh .a', document)).to.have.length(2); //found two elements
expect(CSSselect('#boosh div.a', document)[0]).to.be.ok(); //found one element
expect(CSSselect('#boosh div', document)).to.have.length(2); //found two {div} elements
expect(CSSselect('#boosh span', document)[0]).to.be.ok(); //found one {span} element
expect(CSSselect('#boosh div div', document)[0]).to.be.ok(); //found a single div
expect(CSSselect('a.odd', document)).to.have.length(1); //found single a
},
'combos': function () {
expect(CSSselect('#boosh div,#boosh span', document)).to.have.length(3); //found 2 divs and 1 span
},
'class with dashes': function() {
expect(CSSselect('.class-with-dashes', document)).to.have.length(1); //found something
},
'should ignore comment nodes': function() {
expect(CSSselect('#boosh *', document)).to.have.length(4); //found only 4 elements under #boosh
},
'deep messy relationships': function() {
// these are mostly characterised by a combination of tight relationships and loose relationships
// on the right side of the query it's easy to find matches but they tighten up quickly as you
// go to the left
// they are useful for making sure the dom crawler doesn't stop short or over-extend as it works
// up the tree the crawl needs to be comprehensive
expect(CSSselect('div#fixtures > div a', document)).to.have.length(5); //found four results for "div#fixtures > div a"
expect(CSSselect('.direct-descend > .direct-descend .lvl2', document)).to.have.length(1); //found one result for ".direct-descend > .direct-descend .lvl2"
expect(CSSselect('.direct-descend > .direct-descend div', document)).to.have.length(1); //found one result for ".direct-descend > .direct-descend div"
expect(CSSselect('.direct-descend > .direct-descend div', document)).to.have.length(1); //found one result for ".direct-descend > .direct-descend div"
expect(CSSselect('div#fixtures div ~ a div', document)).to.be.empty(); //found no results for odd query
expect(CSSselect('.direct-descend > .direct-descend > .direct-descend ~ .lvl2', document)).to.be.empty(); //found no results for another odd query
}
},
'CSS 2': {
'get elements by attribute': function () {
var wanted = CSSselect('#boosh div[test]', document)[0];
var expected = DomUtils.getElementById('booshTest', document);
expect(wanted).to.be(expected); //found attribute
expect(CSSselect('#boosh div[test=fg]', document)[0]).to.be(expected); //found attribute with value
expect(CSSselect('em[rel~="copyright"]', document)).to.have.length(1); //found em[rel~="copyright"]
expect(CSSselect('em[nopass~="copyright"]', document)).to.be.empty(); //found em[nopass~="copyright"]
},
'should not throw error by attribute selector': function () {
expect(CSSselect('[foo^="bar"]', document)).to.have.length(1); //found 1 element
},
'crazy town': function () {
var el = DomUtils.getElementById('attr-test3', document);
expect(CSSselect('div#attr-test3.found.you[title="whatup duders"]', document)[0]).to.be(el); //found the right element
}
},
'attribute selectors': {
/* CSS 2 SPEC */
'[attr]': function () {
var expected = DomUtils.getElementById('attr-test-1', document);
expect(CSSselect('#attributes div[unique-test]', document)[0]).to.be(expected); //found attribute with [attr]
},
'[attr=val]': function () {
var expected = DomUtils.getElementById('attr-test-2', document);
expect(CSSselect('#attributes div[test="two-foo"]', document)[0]).to.be(expected); //found attribute with =
expect(CSSselect("#attributes div[test='two-foo']", document)[0]).to.be(expected); //found attribute with =
expect(CSSselect('#attributes div[test=two-foo]', document)[0]).to.be(expected); //found attribute with =
},
'[attr~=val]': function () {
var expected = DomUtils.getElementById('attr-test-3', document);
expect(CSSselect('#attributes div[test~=three]', document)[0]).to.be(expected); //found attribute with ~=
},
'[attr|=val]': function () {
var expected = DomUtils.getElementById('attr-test-2', document);
expect(CSSselect('#attributes div[test|="two-foo"]', document)[0]).to.be(expected); //found attribute with |=
expect(CSSselect('#attributes div[test|=two]', document)[0]).to.be(expected); //found attribute with |=
},
'[href=#x] special case': function () {
var expected = DomUtils.getElementById('attr-test-4', document);
expect(CSSselect('#attributes a[href="#aname"]', document)[0]).to.be(expected); //found attribute with href=#x
},
/* CSS 3 SPEC */
'[attr^=val]': function () {
var expected = DomUtils.getElementById('attr-test-2', document);
expect(CSSselect('#attributes div[test^=two]', document)[0]).to.be(expected); //found attribute with ^=
},
'[attr$=val]': function () {
var expected = DomUtils.getElementById('attr-test-2', document);
expect(CSSselect('#attributes div[test$=foo]', document)[0]).to.be(expected); //found attribute with $=
},
'[attr*=val]': function () {
var expected = DomUtils.getElementById('attr-test-3', document);
expect(CSSselect('#attributes div[test*=hree]', document)[0]).to.be(expected); //found attribute with *=
},
'direct descendants': function () {
expect(CSSselect('#direct-descend > .direct-descend', document)).to.have.length(2); //found two direct descendents
expect(CSSselect('#direct-descend > .direct-descend > .lvl2', document)).to.have.length(3); //found three second-level direct descendents
},
'sibling elements': function () {
expect(CSSselect('#sibling-selector ~ .sibling-selector', document)).to.have.length(2); //found two siblings
expect(CSSselect('#sibling-selector ~ div.sibling-selector', document)).to.have.length(2); //found two siblings
expect(CSSselect('#sibling-selector + div.sibling-selector', document)).to.have.length(1); //found one sibling
expect(CSSselect('#sibling-selector + .sibling-selector', document)).to.have.length(1); //found one sibling
expect(CSSselect('.parent .oldest ~ .sibling', document)).to.have.length(4); //found four younger siblings
expect(CSSselect('.parent .middle ~ .sibling', document)).to.have.length(2); //found two younger siblings
expect(CSSselect('.parent .middle ~ h4', document)).to.have.length(1); //found next sibling by tag
expect(CSSselect('.parent .middle ~ h4.younger', document)).to.have.length(1); //found next sibling by tag and class
expect(CSSselect('.parent .middle ~ h3', document)).to.be.empty(); //an element can't be its own sibling
expect(CSSselect('.parent .middle ~ h2', document)).to.be.empty(); //didn't find an older sibling
expect(CSSselect('.parent .youngest ~ .sibling', document)).to.be.empty(); //found no younger siblings
expect(CSSselect('.parent .oldest + .sibling', document)).to.have.length(1); //found next sibling
expect(CSSselect('.parent .middle + .sibling', document)).to.have.length(1); //found next sibling
expect(CSSselect('.parent .middle + h4', document)).to.have.length(1); //found next sibling by tag
expect(CSSselect('.parent .middle + h3', document)).to.be.empty(); //an element can't be its own sibling
expect(CSSselect('.parent .middle + h2', document)).to.be.empty(); //didn't find an older sibling
expect(CSSselect('.parent .youngest + .sibling', document)).to.be.empty(); //found no younger siblings
}
},
/*
'Uniq': {
'duplicates arent found in arrays': function () {
expect(CSSselect.uniq(['a', 'b', 'c', 'd', 'e', 'a', 'b', 'c', 'd', 'e'])).to.have.length(5); //result should be a, b, c, d, e
expect(CSSselect.uniq(['a', 'b', 'c', 'c', 'c'])).to.have.length(3); //result should be a, b, c
}
},
*/
'element-context queries': {
/*
'relationship-first queries': function() {
expect(CSSselect('> .direct-descend', CSSselect('#direct-descend', document))).to.have.length(2); //found two direct descendents using > first
expect(CSSselect('~ .sibling-selector', CSSselect('#sibling-selector', document))).to.have.length(2); //found two siblings with ~ first
expect(CSSselect('+ .sibling-selector', CSSselect('#sibling-selector', document))).to.have.length(1); //found one sibling with + first
expect(CSSselect('> .tokens a', CSSselect('.idless', document)[0])).to.have.length(1); //found one sibling from a root with no id
},
*/
// should be able to query on an element that hasn't been inserted into the dom
'detached fragments': function() {
expect(CSSselect('.a span', frag)).to.have.length(1); //should find child elements of fragment
//expect(CSSselect('> div p em', frag)).to.have.length(2); //should find child elements of fragment, relationship first
},
'byId sub-queries within detached fragment': function () {
expect(CSSselect('#emem', frag)).to.have.length(1); //found "#id" in fragment
expect(CSSselect('.d.i #emem', frag)).to.have.length(1); //found ".class.class #id" in fragment
expect(CSSselect('.d #oooo #emem', frag)).to.have.length(1); //found ".class #id #id" in fragment
//expect(CSSselect('> div #oooo', frag)).to.have.length(1); //found "> .class #id" in fragment
expect(CSSselect('#oooo', CSSselect('#emem', frag)).length).to.not.be.ok(); //shouldn't find #oooo (ancestor) within #emem (descendent)
expect(CSSselect('#sep', CSSselect('#emem', frag)).length).to.not.be.ok(); //shouldn't find #sep within #emem (unrelated)
},
/*
'exclude self in match': function() {
expect(CSSselect('.order-matters', CSSselect('#order-matters', document))).to.have.length(4); //should not include self in element-context queries
},
*/
// because form's have .length
'forms can be used as contexts': function() {
expect(CSSselect('*', CSSselect('form', document)[0])).to.have.length(3); //found 3 elements under &lt;form&gt;
}
},
'tokenizer': {
'should not get weird tokens': function () {
expect(CSSselect('div .tokens[title="one"]', document)[0]).to.be(DomUtils.getElementById('token-one', document)); //found div .tokens[title="one"]
expect(CSSselect('div .tokens[title="one two"]', document)[0]).to.be(DomUtils.getElementById('token-two', document)); //found div .tokens[title="one two"]
expect(CSSselect('div .tokens[title="one two three #%"]', document)[0]).to.be(DomUtils.getElementById('token-three', document)); //found div .tokens[title="one two three #%"]
expect(CSSselect("div .tokens[title='one two three #%'] a", document)[0]).to.be(DomUtils.getElementById('token-four', document)); //found div .tokens[title=\'one two three #%\'] a
expect(CSSselect('div .tokens[title="one two three #%"] a[href$=foo] div', document)[0]).to.be(DomUtils.getElementById('token-five', document)); //found div .tokens[title="one two three #%"] a[href=foo] div
}
},
'interesting syntaxes': {
'should parse bad selectors': function () {
expect(CSSselect('#spaced-tokens p em a', document).length).to.be.ok(); //found element with funny tokens
}
},
'order matters': {
// <div id="order-matters">
// <p class="order-matters"></p>
// <a class="order-matters">
// <em class="order-matters"></em><b class="order-matters"></b>
// </a>
// </div>
'the order of elements return matters': function () {
function tag(el) {
return el.name.toLowerCase();
}
var els = CSSselect('#order-matters .order-matters', document);
expect(tag(els[0])).to.be('p'); //first element matched is a {p} tag
expect(tag(els[1])).to.be('a'); //first element matched is a {a} tag
expect(tag(els[2])).to.be('em'); //first element matched is a {em} tag
expect(tag(els[3])).to.be('b'); //first element matched is a {b} tag
}
},
'pseudo-selectors': {
':contains': function() {
expect(CSSselect('li:contains(humans)', document)).to.have.length(1); //found by "element:contains(text)"
expect(CSSselect(':contains(humans)', document)).to.have.length(5); //found by ":contains(text)", including all ancestors
// * is an important case, can cause weird errors
expect(CSSselect('*:contains(humans)', document)).to.have.length(5); //found by "*:contains(text)", including all ancestors
expect(CSSselect('ol:contains(humans)', document)).to.have.length(1); //found by "ancestor:contains(text)"
},
':not': function() {
expect(CSSselect('.odd:not(div)', document)).to.have.length(1); //found one .odd :not an &lt;a&gt;
},
':first-child': function () {
expect(CSSselect('#pseudos div:first-child', document)[0]).to.be(pseudos[0]); //found first child
expect(CSSselect('#pseudos div:first-child', document)).to.have.length(1); //found only 1
},
':last-child': function () {
var all = DomUtils.getElementsByTagName('div', pseudos);
expect(CSSselect('#pseudos div:last-child', document)[0]).to.be(all[all.length - 1]); //found last child
expect(CSSselect('#pseudos div:last-child', document)).to.have.length(1); //found only 1
},
'ol > li[attr="boosh"]:last-child': function () {
var expected = DomUtils.getElementById('attr-child-boosh', document);
expect(CSSselect('ol > li[attr="boosh"]:last-child', document)).to.have.length(1); //only 1 element found
expect(CSSselect('ol > li[attr="boosh"]:last-child', document)[0]).to.be(expected); //found correct element
},
':nth-child(odd|even|x)': function () {
var second = DomUtils.getElementsByTagName('div', pseudos)[1];
expect(CSSselect('#pseudos :nth-child(odd)', document)).to.have.length(4); //found 4 odd elements
expect(CSSselect('#pseudos div:nth-child(odd)', document)).to.have.length(3); //found 3 odd elements with div tag
expect(CSSselect('#pseudos div:nth-child(even)', document)).to.have.length(3); //found 3 even elements with div tag
expect(CSSselect('#pseudos div:nth-child(2)', document)[0]).to.be(second); //found 2nd nth-child of pseudos
},
':nth-child(expr)': function () {
var fifth = DomUtils.getElementsByTagName('a', pseudos)[0];
var sixth = DomUtils.getElementsByTagName('div', pseudos)[4];
expect(CSSselect('#pseudos :nth-child(3n+1)', document)).to.have.length(3); //found 3 elements
expect(CSSselect('#pseudos :nth-child(+3n-2)', document)).to.have.length(3); //found 3 elements'
expect(CSSselect('#pseudos :nth-child(-n+6)', document)).to.have.length(6); //found 6 elements
expect(CSSselect('#pseudos :nth-child(-n+5)', document)).to.have.length(5); //found 5 elements
expect(CSSselect('#pseudos :nth-child(3n+2)', document)[1]).to.be(fifth); //second :nth-child(3n+2) is the fifth child
expect(CSSselect('#pseudos :nth-child(3n)', document)[1]).to.be(sixth); //second :nth-child(3n) is the sixth child
},
':nth-last-child(odd|even|x)': function () {
var second = DomUtils.getElementsByTagName('div', pseudos)[1];
expect(CSSselect('#pseudos :nth-last-child(odd)', document)).to.have.length(4); //found 4 odd elements
expect(CSSselect('#pseudos div:nth-last-child(odd)', document)).to.have.length(3); //found 3 odd elements with div tag
expect(CSSselect('#pseudos div:nth-last-child(even)', document)).to.have.length(3); //found 3 even elements with div tag
expect(CSSselect('#pseudos div:nth-last-child(6)', document)[0]).to.be(second); //6th nth-last-child should be 2nd of 7 elements
},
':nth-last-child(expr)': function () {
var third = DomUtils.getElementsByTagName('div', pseudos)[2];
expect(CSSselect('#pseudos :nth-last-child(3n+1)', document)).to.have.length(3); //found 3 elements
expect(CSSselect('#pseudos :nth-last-child(3n-2)', document)).to.have.length(3); //found 3 elements
expect(CSSselect('#pseudos :nth-last-child(-n+6)', document)).to.have.length(6); //found 6 elements
expect(CSSselect('#pseudos :nth-last-child(-n+5)', document)).to.have.length(5); //found 5 elements
expect(CSSselect('#pseudos :nth-last-child(3n+2)', document)[0]).to.be(third); //first :nth-last-child(3n+2) is the third child
},
':nth-of-type(expr)': function () {
var a = DomUtils.getElementsByTagName('a', pseudos)[0];
expect(CSSselect('#pseudos div:nth-of-type(3n+1)', document)).to.have.length(2); //found 2 div elements
expect(CSSselect('#pseudos a:nth-of-type(3n+1)', document)).to.have.length(1); //found 1 a element
expect(CSSselect('#pseudos a:nth-of-type(3n+1)', document)[0]).to.be(a); //found the right a element
expect(CSSselect('#pseudos a:nth-of-type(3n)', document)).to.be.empty(); //no matches for every third a
expect(CSSselect('#pseudos a:nth-of-type(odd)', document)).to.have.length(1); //found the odd a
expect(CSSselect('#pseudos a:nth-of-type(1)', document)).to.have.length(1); //found the first a
},
':nth-last-of-type(expr)': function () {
var second = DomUtils.getElementsByTagName('div', pseudos)[1];
expect(CSSselect('#pseudos div:nth-last-of-type(3n+1)', document)).to.have.length(2); //found 2 div elements
expect(CSSselect('#pseudos a:nth-last-of-type(3n+1)', document)).to.have.length(1); //found 1 a element
expect(CSSselect('#pseudos div:nth-last-of-type(5)', document)[0]).to.be(second); //5th nth-last-of-type should be 2nd of 7 elements
},
':first-of-type': function () {
expect(CSSselect('#pseudos a:first-of-type', document)[0]).to.be(DomUtils.getElementsByTagName('a', pseudos)[0]); //found first a element
expect(CSSselect('#pseudos a:first-of-type', document)).to.have.length(1); //found only 1
},
':last-of-type': function () {
var all = DomUtils.getElementsByTagName('div', pseudos);
expect(CSSselect('#pseudos div:last-of-type', document)[0]).to.be(all[all.length - 1]); //found last div element
expect(CSSselect('#pseudos div:last-of-type', document)).to.have.length(1); //found only 1
},
':only-of-type': function () {
expect(CSSselect('#pseudos a:only-of-type', document)[0]).to.be(DomUtils.getElementsByTagName('a', pseudos)[0]); //found the only a element
expect(CSSselect('#pseudos a:first-of-type', document)).to.have.length(1); //found only 1
},
':target': function () {
location.hash = '';
expect(CSSselect('#pseudos:target', document)).to.be.empty(); //#pseudos is not the target
location.hash = '#pseudos';
expect(CSSselect('#pseudos:target', document)).to.have.length(1); //now #pseudos is the target
location.hash = '';
},
'custom pseudos': function() {
// :humanoid implemented just for testing purposes
expect(CSSselect(':humanoid', document)).to.have.length(2); //selected using custom pseudo
}
},
/*
'argument types': {
'should be able to pass in nodes as arguments': function () {
var el = DomUtils.getElementById('boosh', document);
expect(CSSselect(el)[0]).to.be(el); //CSSselect(el)[0] == el
expect(CSSselect(el, 'body')[0]).to.be(el); //CSSselect(el, 'body')[0] == el
expect(CSSselect(el, document)[0]).to.be(el); //CSSselect(el, document)[0] == el
expect(CSSselect(window)[0]).to.be(window); //CSSselect(window)[0] == window
expect(CSSselect(document)[0]).to.be(document); //CSSselect(document)[0] == document
},
'should be able to pass in an array of results as arguments': function () {
var el = DomUtils.getElementById('boosh', document);
var result = CSSselect([CSSselect('#boosh', document), CSSselect(document), CSSselect(window)]);
expect(result).to.have.length(3); //3 elements in the combined set
expect(result[0]).to.be(el); //result[0] == el
expect(result[1]).to.be(document); //result[0] == document
expect(result[2]).to.be(window); //result[0] == window
expect(CSSselect([CSSselect('#pseudos div.odd', document), CSSselect('#pseudos div.even', document)])).to.have.length(6); //found all the odd and even divs
}
},
*/
'is()': {
'simple selectors': function () {
expect(CSSselect.is(el, 'li')).to.be.ok(); //tag
expect(CSSselect.is(el, '*')).to.be.ok(); //wildcard
expect(CSSselect.is(el, '#attr-child-boosh')).to.be.ok(); //#id
expect(CSSselect.is(el, '[attr]')).to.be.ok(); //[attr]
expect(CSSselect.is(el, '[attr=boosh]')).to.be.ok(); //[attr=val]
expect(CSSselect.is(el, 'div')).to.not.be.ok(); //wrong tag
expect(CSSselect.is(el, '#foo')).to.not.be.ok(); //wrong #id
expect(CSSselect.is(el, '[foo]')).to.not.be.ok(); //wrong [attr]
expect(CSSselect.is(el, '[attr=foo]')).to.not.be.ok(); //wrong [attr=val]
},
'selector sequences': function () {
expect(CSSselect.is(el, 'li#attr-child-boosh[attr=boosh]')).to.be.ok(); //tag#id[attr=val]
expect(CSSselect.is(el, 'div#attr-child-boosh[attr=boosh]')).to.not.be.ok(); //wrong tag#id[attr=val]
},
'selector sequences combinators': function () {
expect(CSSselect.is(el, 'ol li')).to.be.ok(); //tag tag
expect(CSSselect.is(el, 'ol>li')).to.be.ok(); //tag>tag
expect(CSSselect.is(el, 'ol>li+li')).to.be.ok(); //tab>tag+tag
expect(CSSselect.is(el, 'ol#list li#attr-child-boosh[attr=boosh]')).to.be.ok(); //tag#id tag#id[attr=val]
expect(CSSselect.is(el, 'ol#list>li#attr-child-boosh[attr=boosh]')).to.not.be.ok(); //wrong tag#id>tag#id[attr=val]
expect(CSSselect.is(el, 'ol ol li#attr-child-boosh[attr=boosh]')).to.be.ok(); //tag tag tag#id[attr=val]
expect(CSSselect.is(CSSselect('#token-four', document)[0], 'div#fixtures>div a')).to.be.ok(); //tag#id>tag tag where ambiguous middle tag requires backtracking
},
'pseudos': function() {
//TODO: more tests!
expect(CSSselect.is(el, 'li:contains(hello)')).to.be.ok(); //matching :contains(text)
expect(CSSselect.is(el, 'li:contains(human)')).to.not.be.ok(); //non-matching :contains(text)
expect(CSSselect.is(CSSselect('#list>li', document)[2], ':humanoid')).to.be.ok(); //matching custom pseudo
expect(CSSselect.is(CSSselect('#list>li', document)[1], ':humanoid')).to.not.be.ok(); //non-matching custom pseudo
}/*,
'context': function () {
expect(CSSselect.is(el, 'li#attr-child-boosh[attr=boosh]', CSSselect('#list', document)[0])).to.be.ok(); //context
expect(CSSselect.is(el, 'ol#list li#attr-child-boosh[attr=boosh]', CSSselect('#boosh', document)[0])).to.not.be.ok(); //wrong context
}*/
},
'selecting elements in other documents': {
'get element by id': function () {
var result = CSSselect('#hsoob', doc);
expect(result[0]).to.be.ok(); //found element with id=hsoob
},
'get elements by class': function () {
expect(CSSselect('#hsoob .a', doc)).to.have.length(2); //found two elements
expect(CSSselect('#hsoob div.a', doc)[0]).to.be.ok(); //found one element
expect(CSSselect('#hsoob div', doc)).to.have.length(2); //found two {div} elements
expect(CSSselect('#hsoob span', doc)[0]).to.be.ok(); //found one {span} element
expect(CSSselect('#hsoob div div', doc)[0]).to.be.ok(); //found a single div
expect(CSSselect('p.odd', doc)).to.have.length(1); //found single br
},
'complex selectors': function () {
expect(CSSselect('.d ~ .sib', doc)).to.have.length(2); //found one ~ sibling
expect(CSSselect('.a .d + .sib', doc)).to.have.length(1); //found 2 + siblings
expect(CSSselect('#hsoob > div > .h', doc)).to.have.length(1); //found span using child selectors
expect(CSSselect('.a .d ~ .sib[test="f g"]', doc)).to.have.length(1); //found 1 ~ sibling with test attribute
},
'byId sub-queries': function () {
expect(CSSselect('#hsoob #spanny', doc)).to.have.length(1); //found "#id #id" in frame
expect(CSSselect('.a #spanny', doc)).to.have.length(1); //found ".class #id" in frame
expect(CSSselect('.a #booshTest #spanny', doc)).to.have.length(1); //found ".class #id #id" in frame
//ok(CSSselect('> #hsoob', doc).length == 1, 'found "> #id" in frame') --> would be good to support this, needs some tweaking though
},
'byId sub-queries within sub-context': function () {
expect(CSSselect('#spanny', CSSselect('#hsoob', doc))).to.have.length(1); //found "#id -> #id" in frame
expect(CSSselect('.a #spanny', CSSselect('#hsoob', doc))).to.have.length(1); //found ".class #id" in frame
expect(CSSselect('.a #booshTest #spanny', CSSselect('#hsoob', doc))).to.have.length(1); //found ".class #id #id" in frame
expect(CSSselect('.a > #booshTest', CSSselect('#hsoob', doc))).to.have.length(1); //found "> .class #id" in frame
expect(CSSselect('#booshTest', CSSselect('#spanny', doc)).length).to.not.be.ok(); //shouldn't find #booshTest (ancestor) within #spanny (descendent)
expect(CSSselect('#booshTest', CSSselect('#lonelyHsoob', doc)).length).to.not.be.ok(); //shouldn't find #booshTest within #lonelyHsoob (unrelated)
}
}
};

View File

@@ -0,0 +1,76 @@
var helper = require("./helper.js"),
doc = helper.getFile("W3C_Selectors.html"),
CSSselect = require("../"),
soupselect = require("cheerio-soupselect"),
selectors = ["body", "div", "body div", "div p", "div > p", "div + p", "div ~ p", "div[class^=exa][class$=mple]", "div p a", "div, p, a", ".note", "div.example", "ul .tocline2", "div.example, div.note", "#title", "h1#title", "div #title", "ul.toc li.tocline2", "ul.toc > li.tocline2", "h1#title + div > p", "h1[id]:contains(Selectors)", "a[href][lang][class]", "div[class]", "div[class=example]", "div[class^=exa]", "div[class$=mple]", "div[class*=e]", "div[class|=dialog]", "div[class!=made_up]", "div[class~=example]"/*, "div:not(.example)", "p:contains(selectors)", "p:nth-child(even)", "p:nth-child(2n)", "p:nth-child(odd)", "p:nth-child(2n+1)", "p:nth-child(n)", "p:only-child", "p:last-child", "p:first-child"*/];
var engines = [function(a,b){return CSSselect.iterate(b,a);}, soupselect.select];
//returns true when an error occurs
function testResult(rule, index){
var results = engines
.map(function(func){ return func(doc, rule); });
//check if both had the same result
for(var i = 1; i < results.length; i++){
//TODO: might be hard to debug with more engines
if(results[i-1].length !== results[i].length){
//console.log(rule, results[i-1].length, results[i].length);
return true;
}
for(var j = 0; j < results[i].length; j++){
if(results[i-1][j] !== results[i][j]){
if(results[i-1].indexOf(results[i][j]) === -1){
return true;
}
}
}
//require("assert").deepEqual(results[i-1], results[i], rule + ": not the same elements");
}
return false;
}
selectors.filter(testResult).forEach(function(rule){ print(rule, "failed!\n"); });
process.exit(0); //don't run speed tests
print("-----\n\nChecking performance\n\n");
//test the speed
var ben = require("ben");
function testSpeed(rule){
print(rule, Array(28-rule.length).join(" "));
var results = engines
.map(function(func){ return function(){ return func(doc, rule); }});
//also add a precompiled CSSselect test
var compiled = CSSselect(rule);
results.unshift(function(){ return CSSselect.iterate(compiled, doc); });
results = results.map(ben);
var min = Math.min.apply(null, results);
var max = Math.max.apply(null, results);
results.forEach(function(result){
if(result === min) return print(" +", result, "+");
if(result === max) return print(" !", result, "!");
if(Math.abs(result-min) > Math.abs(result-max)){
return print(" =", result, "=");
}
print(" ~", result, "~");
});
print("\n");
}
print("RULE ", "CSSselect (pc)", "CSSselect", "soupselect\n");
selectors.forEach(testSpeed);
function print(){
process.stdout.write(Array.prototype.join.call(arguments, " "));
}

View File

@@ -0,0 +1,34 @@
{
"name": "cheerio-select",
"version": "0.0.3",
"description": "Selector engine for cheerio",
"keywords": [],
"author": {
"name": "Matt Mueller",
"email": "mattmuelle@gmail.com"
},
"dependencies": {
"CSSselect": "0.x"
},
"devDependencies": {
"mocha": "*",
"cheerio": "*",
"expect.js": "*",
"underscore": "*"
},
"main": "index",
"engines": {
"node": ">= 0.4.7"
},
"scripts": {
"test": "make test"
},
"readme": "\n# cheerio-select [![Build Status](https://secure.travis-ci.org/MatthewMueller/cheerio-select.png?branch=master)](http://travis-ci.org/MatthewMueller/cheerio-select)\n\n Tiny wrapper around FB55's excellent [CSSselect](https://github.com/FB55/CSSselect) library.\n\n cheerio-select provides a comprehensive test suite based on sizzle's test suite. \n\n > Warning: Currently, not all tests pass, and some sizzle features will not be supported\n\n## Usage\n\n var select = require('cheerio-select'),\n parse = require('cheerio').parse,\n dom = parse('<ul id = \"fruits\"><li class = \"apple\">Apple</li></ul>');\n\n select('#fruits > .apple', dom);\n => [{...}]\n\n## TODO \n\n* Get all the unit tests to pass!\n\n## Run tests\n\n npm install\n make test\n\n## License \n\n(The MIT License)\n\nCopyright (c) 2012 Matt Mueller &lt;mattmuelle@gmail.com&gt;\n\nPermission is hereby granted, free of charge, to any person obtaining\na copy of this software and associated documentation files (the\n'Software'), to deal in the Software without restriction, including\nwithout limitation the rights to use, copy, modify, merge, publish,\ndistribute, sublicense, and/or sell copies of the Software, and to\npermit persons to whom the Software is furnished to do so, subject to\nthe following conditions:\n\nThe above copyright notice and this permission notice shall be\nincluded in all copies or substantial portions of the Software.\n\nTHE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,\nEXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF\nMERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.\nIN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY\nCLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,\nTORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE\nSOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.",
"readmeFilename": "Readme.md",
"_id": "cheerio-select@0.0.3",
"dist": {
"shasum": "84b5fc11cb2f2ab67bfa917439b918200721c3ce"
},
"_from": "cheerio-select@*",
"_resolved": "https://registry.npmjs.org/cheerio-select/-/cheerio-select-0.0.3.tgz"
}

11
node_modules/cheerio/node_modules/entities/LICENSE generated vendored Normal file
View File

@@ -0,0 +1,11 @@
Copyright (c) Felix Böhm
All rights reserved.
Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
THIS IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS,
EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View File

@@ -0,0 +1 @@
{"amp;":"\u0026","apos;":"\u0027","gt;":"\u003e","lt;":"\u003c","quot;":"\u0022"}

79
node_modules/cheerio/node_modules/entities/index.js generated vendored Normal file
View File

@@ -0,0 +1,79 @@
var re_hex = /&#x[\da-f]+;?/gi,
re_strictHex = /&#x[\da-f]+;/gi,
re_charCode = /&#\d+;?/g,
re_strictCharCode = /&#\d+;/g,
re_notUTF8 = /[\u00c0-\u00d6\u00d8-\u00f6\u00f8-\u02ff\u0370-\u037d\u037f-\u1fff\u200c\u200d\u2070-\u218f\u2c00-\u2fef\u3001-\ud7ff\uf900-\ufdcf\ufdf0-\ufffd]/g,
fromCharCode = String.fromCharCode,
num_func = function(num){return fromCharCode(parseInt(num.substr(2), 10));},
hex_func = function(hex){return fromCharCode(parseInt(hex.substr(3), 16));},
strictNum_func = function(num){return fromCharCode(num.slice(2, -1));},
strictHex_func = function(num){return fromCharCode(parseInt(num.slice(3, -1), 16));},
charCode_func = function(c){ return "&#" +c.charCodeAt(0) +";";};
var fetch = function(filename, inherits){
var obj = require("./entities/" +filename +".json");
if(inherits) for(var name in inherits) obj[name] = inherits[name];
var re = Object.keys(obj).sort().join("|").replace(/(\w+)\|\1;/g, "$1;?");
return {
func: function(name){
return obj[name.substr(1)];
},
re: new RegExp("&(?:" +re +")", "g"),
obj: obj
};
};
var getReverse = function(obj){
var reverse = Object.keys(obj).reduce(function(reverse, name){
reverse[obj[name]] = name;
return reverse;
}, {});
return {
func: function(name){ return "&" +reverse[name]; },
re: new RegExp("\\" +Object.keys(reverse).sort().join("|\\"), "g")
};
};
var modes = ["XML", "HTML4", "HTML5"];
module.exports = {
decode: function(data, level){
if(!modes[level]) level = 0;
return module.exports["decode" +modes[level]](data);
},
encode: function(data, level){
if(!modes[level]) level = 0;
return module.exports["encode" +modes[level]](data);
}
};
var tmp;
modes.forEach(function(name){
var obj = fetch(name.toLowerCase(), tmp),
regex = obj.re,
func = obj.func;
tmp = obj.obj;
module.exports["decode" +name] = function(data){
return data
.replace(regex, func)
.replace(re_hex, hex_func)
.replace(re_charCode, num_func);
};
var reverse = getReverse(obj.obj),
reverse_re = reverse.re,
reverse_func = reverse.func;
module.exports["encode" +name] = function(data){
return data
.replace(reverse_re, reverse_func)
.replace(re_notUTF8, charCode_func);
};
});

View File

@@ -0,0 +1,29 @@
{
"name": "entities",
"version": "0.2.1",
"description": "Encode & decode XML/HTML entities with ease",
"author": {
"name": "Felix Boehm",
"email": "me@feedic.com"
},
"keywords": [
"html",
"xml",
"entity",
"encoding"
],
"main": "./index.js",
"repository": {
"type": "git",
"url": "git://github.com/fb55/node-entities.git"
},
"license": "BSD-like",
"readme": "#entities\n\nEn- & decoder for XML/HTML entities.\n\n####Features:\n* Focussed on ___speed___\n* Supports three levels of entities: __XML__, __HTML4__ & __HTML5__\n * Supports _char code_ entities (eg. `&#x55;`)\n * Special optimizations for XML: A more restrictive syntax allows faster parsing\n\n##How to…\n\n###…install `entities`\n\n npm install entities\n\n###…use `entities`\n\n```javascript\n//encoding\nrequire(\"entities\").encode(<str> data[, <int> level]);\n//decoding\nrequire(\"entities\").decode(<str> data[, <int> level]);\n```\n\nThe `level` attribute indicates what level of entities should be decoded (0 = XML, 1 = HTML4 and 2 = HTML5). The default is 0 (read: XML).\n\nThere are also methods to access the level directly. Just append the name of the level to the action and you're ready to go (e.g. `encodeHTML4(data)`, `decodeXML(data)`).\n\n##TODO\n* There should be a way to remove tables that aren't used. The HTML5 table is pretty heavy, if it's not needed, it shouldn't be kept in memory.",
"readmeFilename": "readme.md",
"_id": "entities@0.2.1",
"dist": {
"shasum": "7aae886864887067f79f252a04c45309f4ac7980"
},
"_from": "entities@0.x",
"_resolved": "https://registry.npmjs.org/entities/-/entities-0.2.1.tgz"
}

31
node_modules/cheerio/node_modules/entities/readme.md generated vendored Normal file
View File

@@ -0,0 +1,31 @@
#entities
En- & decoder for XML/HTML entities.
####Features:
* Focussed on ___speed___
* Supports three levels of entities: __XML__, __HTML4__ & __HTML5__
* Supports _char code_ entities (eg. `&#x55;`)
* Special optimizations for XML: A more restrictive syntax allows faster parsing
##How to…
###…install `entities`
npm install entities
###…use `entities`
```javascript
//encoding
require("entities").encode(<str> data[, <int> level]);
//decoding
require("entities").decode(<str> data[, <int> level]);
```
The `level` attribute indicates what level of entities should be decoded (0 = XML, 1 = HTML4 and 2 = HTML5). The default is 0 (read: XML).
There are also methods to access the level directly. Just append the name of the level to the action and you're ready to go (e.g. `encodeHTML4(data)`, `decodeXML(data)`).
##TODO
* There should be a way to remove tables that aren't used. The HTML5 table is pretty heavy, if it's not needed, it shouldn't be kept in memory.

23
node_modules/cheerio/node_modules/entities/test.js generated vendored Normal file
View File

@@ -0,0 +1,23 @@
var ben = require("ben"),
decode = require("./").decodeXML,
encode = require("./").encode,
decoded = "asdf & ÿ ü '",
encoded = encode(decoded);
(function(result){
if(result !== "asdf &amp; &#255; &#252; &apos;"){
throw Error("Unexpected output: " + result);
}
}(encode(decoded)));
var tmp = Array(201).join(decoded);
console.log("Encoding:", ben(function(){ encode(tmp); }));
(function(result){
if(result !== decoded){
throw Error("Unexpected output: " + result);
}
}(decode(encoded, 2)));
tmp = Array(201).join(encoded);
console.log("Decoding:", ben(function(){ decode(tmp, 2); }));

View File

@@ -0,0 +1,5 @@
language: node_js
node_js:
- 0.6
- 0.8
- 0.9

18
node_modules/cheerio/node_modules/htmlparser2/LICENSE generated vendored Normal file
View File

@@ -0,0 +1,18 @@
Copyright 2010, 2011, Chris Winberry <chris@winberry.net>. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
deal in the Software without restriction, including without limitation the
rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
sell copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
IN THE SOFTWARE.

View File

@@ -0,0 +1,72 @@
#htmlparser2 [![Build Status](https://secure.travis-ci.org/fb55/node-htmlparser.png)](http://travis-ci.org/fb55/node-htmlparser)
A forgiving HTML/XML/RSS parser written in JS for NodeJS. The parser can handle streams (chunked data) and supports custom handlers for writing custom DOMs/output.
##Installing
npm install htmlparser2
##Usage
```javascript
var htmlparser = require("htmlparser2");
var parser = new htmlparser.Parser({
onopentag: function(name, attribs){
if(name === "script" && attribs.type === "text/javascript"){
console.log("JS! Hooray!");
}
},
ontext: function(text){
console.log("-->", text);
},
onclosetag: function(tagname){
if(tagname === "script"){
console.log("That's it?!");
}
}
});
parser.write("Xyz <script language= javascript>var foo = '<<bar>>';< / script>");
parser.done();
```
Output (simplified):
```javascript
--> Xyz
JS! Hooray!
--> var foo = '<<bar>>';
That's it?!
```
Read more about the parser in the [wiki](https://github.com/FB55/node-htmlparser/wiki/Parser-options).
##Get a DOM
The `DomHandler` (known as `DefaultHandler` in the original `htmlparser` module) produces a DOM (document object model) that can be manipulated using the `DomUtils` helper.
The `DomHandler`, while still bundled with this module, was recently moved to it's [own module](https://github.com/FB55/domhandler). Have a look at it for further information.
##Parsing RSS/RDF/Atom Feeds
```javascript
new htmlparser.FeedHandler(function(<error> error, <object> feed){
...
});
```
##Performance
Using a slightly modified version of [node-expat](https://github.com/astro/node-expat)s `bench.js`, I received the following results (on a MacBook (late 2010)):
* [htmlparser](https://github.com/tautologistics/node-htmlparser): 51779 el/s
* [sax.js](https://github.com/isaacs/sax-js): 53169 el/s
* [node-expat](https://github.com/astro/node-expat): 103388 el/s
* [htmlparser2](https://github.com/fb55/node-htmlparser): 118614 el/s
The test may be found in `tests/bench.js`.
##How is this different from [node-htmlparser](https://github.com/tautologistics/node-htmlparser)?
This is a fork of the project above. The main difference is that this is intended to be used only with node (it runs on other platforms using [browserify](https://github.com/substack/node-browserify)). Besides, the code is much better structured, has less duplications and is remarkably faster than the original.
The parser now provides a callback interface close to [sax.js](https://github.com/isaacs/sax-js) (originally targeted at [readabilitySAX](https://github.com/fb55/readabilitysax)). I also fixed a couple of bugs & included some pull requests for the original project (eg. [RDF feed support](https://github.com/tautologistics/node-htmlparser/pull/35)).
The support for location data and verbose output was removed a couple of versions ago. It's still available in the [verbose branch](https://github.com/FB55/node-htmlparser/tree/verbose).
The `DefaultHandler` and the `RssHandler` were renamed to clarify their purpose (to `DomHandler` and `FeedHandler`). The old names are still available when requiring `htmlparser2`, so your code should work as expected.

View File

@@ -0,0 +1,10 @@
//Types of elements found in the DOM
module.exports = {
Text: 0, //Text
Directive: 1, //<? ... ?>
Comment: 2, //<!-- ... -->
Script: 3, //<script> tags
Style: 4, //<style> tags
Tag: 5, //Any tag
CDATA: 6 //<![CDATA[ ... ]]>
};

View File

@@ -0,0 +1,87 @@
var index = require("./index.js"),
DomHandler = index.DomHandler,
DomUtils = index.DomUtils;
//TODO: make this a streamable handler
function FeedHandler(callback){
this.init(callback, { ignoreWhitespace: true });
}
require("util").inherits(FeedHandler, DomHandler);
FeedHandler.prototype.init = DomHandler;
function getElements(what, where, one, recurse){
if(one) return DomUtils.getElementsByTagName(what, where, recurse, 1)[0];
return DomUtils.getElementsByTagName(what, where, recurse);
}
function fetch(what, where, recurse){
var ret = DomUtils.getElementsByTagName(what, where, recurse, 1);
return ret.length > 0 && ret[0].children.length > 0 && ret[0].children[0].data;
}
var isValidFeed = function(value) {
return value === "rss" || value === "feed" || value === "rdf:RDF";
};
FeedHandler.prototype.onend = function() {
var feed = {},
feedRoot = getElements(isValidFeed, this.dom, true),
tmp, childs;
if (feedRoot) {
if(feedRoot.name === "feed"){
childs = feedRoot.children;
feed.type = "atom";
if(tmp = fetch("id", childs)) feed.id = tmp;
if(tmp = fetch("title", childs)) feed.title = tmp;
if((tmp = getElements("link", childs, true)) && (tmp = tmp.attribs) && (tmp = tmp.href)) feed.link = tmp;
if(tmp = fetch("subtitle", childs)) feed.description = tmp;
if(tmp = fetch("updated", childs)) feed.updated = new Date(tmp);
if(tmp = fetch("email", childs, true)) feed.author = tmp;
feed.items = getElements("entry", childs).map(function(item){
var entry = {}, tmp;
item = item.children;
if(tmp = fetch("id", item)) entry.id = tmp;
if(tmp = fetch("title", item)) entry.title = tmp;
if((tmp = getElements("link", item, true)) && (tmp = tmp.attribs) && (tmp = tmp.href)) entry.link = tmp;
if(tmp = fetch("summary", item)) entry.description = tmp;
if(tmp = fetch("updated", item)) entry.pubDate = new Date(tmp);
return entry;
});
} else{
childs = getElements("channel", feedRoot.children, true).children;
feed.type = feedRoot.name.substr(0, 3);
feed.id = "";
if(tmp = fetch("title", childs)) feed.title = tmp;
if(tmp = fetch("link", childs)) feed.link = tmp;
if(tmp = fetch("description", childs)) feed.description = tmp;
if(tmp = fetch("lastBuildDate", childs)) feed.updated = new Date(tmp);
if(tmp = fetch("managingEditor", childs)) feed.author = tmp;
feed.items = getElements("item", feedRoot.children).map(function(item){
var entry = {}, tmp;
item = item.children;
if(tmp = fetch("guid", item)) entry.id = tmp;
if(tmp = fetch("title", item)) entry.title = tmp;
if(tmp = fetch("link", item)) entry.link = tmp;
if(tmp = fetch("description", item)) entry.description = tmp;
if(tmp = fetch("pubDate", item)) entry.pubDate = new Date(tmp);
return entry;
});
}
}
this.dom = feed;
DomHandler.prototype._handleCallback.call(
this, feedRoot ? null : Error("couldn't find root of feed")
);
};
module.exports = FeedHandler;

View File

@@ -0,0 +1,397 @@
var ElementType = require("./ElementType.js");
function Parser(cbs, options){
this._options = options || defaultOpts;
this._cbs = cbs || defaultCbs;
this._buffer = "";
this._tagSep = ">";
this._stack = [];
this._wroteSpecial = false;
this._contentFlags = 0;
this._done = false;
this._running = true; //false if paused
}
//Regular expressions used for cleaning up and parsing (stateless)
/* http://dev.w3.org/html5/html-author/#attributes
* - Whitespace is permitted after the tag name, but it is not permitted before the tag name.
* - Attribute names must consist of one or more characters other than the space characters,
* control characters, NULL, one of the characters: double quote ("), single quote ('),
* greater-than sign (>), solidus (/), equals sign (=), nor any characters that are not defined by Unicode.
* - An empty attribute is one where the value has been omitted. (<input disabled>...</input>
* - An unquoted attribute value must not contain any literal space characters, any of the characters:
* double quote ("), apostrophe ('), equals sign (=), less-than sign (<), greater-than sign (>),
* or grave accent (`), and the value must not be the empty string.
* - There may be space characters between the attribute name and the equals sign (=),
* and between that and the attribute value.
* - Double-quoted attributes must not contain any double-quote characters or ambiguous ampersands.
* - Single-quoted attributes must not contain any single-quote characters or ambiguous ampersands.
*/
// element name: (<[^<& ]+)
// attribute name: ( [^"'=>\/]+)
// attribute value: (\s*=\s*(?:
// "([^"]*)"|
// '([^']*)'|
// [^\s"'=<>`]+)
// tag end: (?=\s|\/|$)
var _reAttrib = /\s+([^"'=>\/\s]+)(?:\s*=\s*(?:"([^"]*)"|'([^']*)'|([^\s"'=<>`]+))|(?=\s)|\/|$)/g,
_reTail = /\s|\/|$/;
var defaultOpts = {
xmlMode: false, //Special behavior for script/style tags by default
lowerCaseAttributeNames: false, //call .toLowerCase for each attribute name
lowerCaseTags: false //call .toLowerCase for each tag name
};
var defaultCbs = {
/*
This is just a plain object
so that the parser doesn't
throw if no arguments were
provided.
*/
/*
oncdataend,
oncdatastart,
onclosetag,
oncomment,
oncommentend,
onerror,
onopentag,
onopentagend,
onprocessinginstruction,
onreset,
ontext
*/
};
var formTags = {
input: true,
option: true,
optgroup: true,
select: true,
button: true,
datalist: true,
textarea: true
};
var openImpliesClose = {
tr : { tr:true, th:true, td:true },
th : { th:true },
td : { thead:true, td:true },
body : { head:true, link:true, script:true },
li : { li:true },
p : { p:true },
select : formTags,
input : formTags,
output : formTags,
button : formTags,
datalist: formTags,
textarea: formTags,
option : { option:true },
optgroup: { optgroup:true }
};
//Parses a complete HTML and pushes it to the handler
Parser.prototype.parseComplete = function(data){
this.reset();
this.end(data);
};
//Parses a piece of an HTML document
Parser.prototype.parseChunk =
Parser.prototype.write = function(data){
if(this._done) this._handleError("Attempted to parse chunk after parsing already done");
this._buffer += data; //FIXME: this can be a bottleneck
if(this._running) this._parseTags();
};
//Tells the parser that the HTML being parsed is complete
Parser.prototype.done =
Parser.prototype.end = function(chunk){
if(this._done) return;
if(chunk) this.write(chunk);
this._done = true;
if(this._running) this._finishParsing();
};
Parser.prototype._finishParsing = function(){
//Parse the buffer to its end
if(this._buffer) this._parseTags(true);
if(this._cbs.onclosetag){
while(this._stack.length) this._cbs.onclosetag(this._stack.pop());
}
if(this._cbs.onend) this._cbs.onend();
};
Parser.prototype.pause = function(){
if(!this._done) this._running = false;
};
Parser.prototype.resume = function(){
if(this._running) return;
this._running = true;
this._parseTags();
if(this._done) this._finishParsing();
};
//Resets the parser to a blank state, ready to parse a new HTML document
Parser.prototype.reset = function(){
Parser.call(this, this._cbs, this._options);
if(this._cbs.onreset) this._cbs.onreset();
};
//Extracts the base tag name from the data value of an element
Parser.prototype._parseTagName = function(data){
var match = data.substr(0, data.search(_reTail));
if(!this._options.lowerCaseTags) return match;
return match.toLowerCase();
};
//Special tags that are treated differently
var SpecialTags = {};
//SpecialTags[ElementType.Tag] = 0x0;
SpecialTags[ElementType.Style] = 0x1; //2^0
SpecialTags[ElementType.Script] = 0x2; //2^1
SpecialTags[ElementType.Comment] = 0x4; //2^2
SpecialTags[ElementType.CDATA] = 0x8; //2^3
var TagValues = {
style: 1,
script: 2
};
//Parses through HTML text and returns an array of found elements
Parser.prototype._parseTags = function(force){
var current = 0,
opening = this._buffer.indexOf("<"),
closing = this._buffer.indexOf(">"),
next, rawData, elementData, lastTagSep;
//if force is true, parse everything
if(force) opening = Infinity;
//opening !== closing is just false if both are -1
while(opening !== closing && this._running){
lastTagSep = this._tagSep;
if((opening !== -1 && opening < closing) || closing === -1){
next = opening;
this._tagSep = "<";
opening = this._buffer.indexOf("<", next + 1);
}
else{
next = closing;
this._tagSep = ">";
closing = this._buffer.indexOf(">", next + 1);
}
rawData = this._buffer.substring(current, next); //The next chunk of data to parse
//set elements for next run
current = next + 1;
if(this._contentFlags >= SpecialTags[ElementType.CDATA]){
// We're inside a CDATA section
this._writeCDATA(rawData);
}
else if(this._contentFlags >= SpecialTags[ElementType.Comment]){
//We're in a comment tag
this._writeComment(rawData);
}
else if(lastTagSep === "<"){
elementData = rawData.trimLeft();
if(elementData.charAt(0) === "/"){
//elementData = elementData.substr(1).trim();
elementData = this._parseTagName(elementData.substr(1));
if(this._contentFlags !== 0){
//if it's a closing tag, remove the flag
if(this._contentFlags & TagValues[elementData]){
//remove the flag
this._contentFlags ^= TagValues[elementData];
} else {
this._writeSpecial(rawData, lastTagSep);
continue;
}
}
this._processCloseTag(elementData);
}
else if(this._contentFlags !== 0) this._writeSpecial(rawData, lastTagSep);
else if(elementData.charAt(0) === "!"){
if(elementData.substr(1, 7) === "[CDATA["){
this._contentFlags |= SpecialTags[ElementType.CDATA];
if(this._cbs.oncdatastart) this._cbs.oncdatastart();
this._writeCDATA(elementData.substr(8));
}
else if(this._contentFlags !== 0) this._writeSpecial(rawData, lastTagSep);
else if(elementData.substr(1, 2) === "--"){
//This tag is a comment
this._contentFlags |= SpecialTags[ElementType.Comment];
this._writeComment(rawData.substr(3));
}
//TODO: This isn't a processing instruction, needs a new name
else if(this._cbs.onprocessinginstruction){
this._cbs.onprocessinginstruction(
"!" + this._parseTagName(elementData.substr(1)),
elementData
);
}
}
else if(elementData.charAt(0) === "?"){
if(this._cbs.onprocessinginstruction){
this._cbs.onprocessinginstruction(
"?" + this._parseTagName(elementData.substr(1)),
elementData
);
}
}
else this._processOpenTag(elementData);
}
else{
if(this._contentFlags !== 0){
this._writeSpecial(rawData, ">");
}
else if(this._cbs.ontext){
if(this._tagSep === ">") rawData += ">"; //it's the second > in a row
if(rawData !== "") this._cbs.ontext(rawData);
}
}
}
this._buffer = this._buffer.substr(current);
};
Parser.prototype._writeCDATA = function(data){
if(this._tagSep === ">" && data.substr(-2) === "]]"){
// CDATA ends
if(data.length !== 2 && this._cbs.ontext){
this._cbs.ontext(data.slice(0,-2));
}
this._contentFlags ^= SpecialTags[ElementType.CDATA];
if(this._cbs.oncdataend) this._cbs.oncdataend();
this._wroteSpecial = false;
}
else if(this._cbs.ontext) this._cbs.ontext(data + this._tagSep);
};
Parser.prototype._writeComment = function(rawData){
if(this._tagSep === ">" && rawData.substr(-2) === "--"){ //comment ends
//remove the written flag (also removes the comment flag)
this._contentFlags ^= SpecialTags[ElementType.Comment];
this._wroteSpecial = false;
if(this._cbs.oncomment) this._cbs.oncomment(rawData.slice(0, -2));
if(this._cbs.oncommentend) this._cbs.oncommentend();
}
else if(this._cbs.oncomment) this._cbs.oncomment(rawData + this._tagSep);
};
Parser.prototype._writeSpecial = function(rawData, lastTagSep){
//if the previous element is text, append the last tag sep to element
if(this._wroteSpecial){
if(this._cbs.ontext) this._cbs.ontext(lastTagSep + rawData);
}
else{ //The previous element was not text
this._wroteSpecial = true;
if(rawData !== "" && this._cbs.ontext) this._cbs.ontext(rawData);
}
};
var emptyTags = {
__proto__: null,
area: true,
base: true,
basefont: true,
br: true,
col: true,
frame: true,
hr: true,
img: true,
input: true,
isindex: true,
link: true,
meta: true,
param: true,
embed: true
};
Parser.prototype._processCloseTag = function(name){
if(this._stack && (!(name in emptyTags) || this._options.xmlMode)){
var pos = this._stack.lastIndexOf(name);
if(pos !== -1)
if(this._cbs.onclosetag){
pos = this._stack.length - pos;
while(pos--) this._cbs.onclosetag(this._stack.pop());
}
else this._stack.splice(pos);
}
//many browsers (eg. Safari, Chrome) convert </br> to <br>
else if(name === "br" && !this._options.xmlMode){
this._processOpenTag(name + "/");
}
};
Parser.prototype._parseAttributes = function(data, lcNames){
for(var match; match = _reAttrib.exec(data);){
this._cbs.onattribute(lcNames ? match[1].toLowerCase() : match[1], match[2] || match[3] || match[4] || "");
}
};
//parses the attribute string
var parseAttributes = function(data, lcNames){
var attrs = {};
for(var match; match = _reAttrib.exec(data);){
attrs[lcNames ? match[1].toLowerCase() : match[1]] = match[2] || match[3] || match[4] || "";
}
return attrs;
};
Parser.prototype._processOpenTag = function(data){
var name = this._parseTagName(data),
attributes = parseAttributes(data, this._options.lowerCaseAttributeNames),
type = ElementType.Tag;
if(this._options.xmlMode){ /*do nothing*/ }
else if(name === "script") type = ElementType.Script;
else if(name === "style") type = ElementType.Style;
if (!this._options.xmlMode && name in openImpliesClose) {
var el;
while ((el = this._stack[this._stack.length-1]) in openImpliesClose[name]) {
this._processCloseTag(el);
}
}
if(this._cbs.onopentagname) this._cbs.onopentagname(name);
if(this._cbs.onopentag) this._cbs.onopentag(name, attributes);
if(this._cbs.onattribute){
this._parseAttributes(data, this._options.lowerCaseAttributeNames);
}
//If tag self-terminates, add an explicit, separate closing tag
/* http://dev.w3.org/html5/html-author/#tags
* In XHTML, self-closing tags are valid but attribute values must be quoted.
* In HTML, self-closing tags must be either void elements or foreign elements.
* Invalid HTML self-closing tag syntax is ignored (treated as an opening tag).
* Foreign elements use XML rules
*/
if((!this._options.xmlMode && name in emptyTags) || (data.substr(-1) === "/" && data.replace(_reAttrib, "").substr(-1) === "/")){
if(this._cbs.onclosetag) this._cbs.onclosetag(name);
} else {
if(type !== ElementType.Tag){
this._contentFlags |= SpecialTags[type];
this._wroteSpecial = false;
}
this._stack.push(name);
}
};
Parser.prototype._handleError = function(error){
error = new Error(error);
if(this._cbs.onerror) this._cbs.onerror(error);
else throw error;
};
module.exports = Parser;

View File

@@ -0,0 +1,19 @@
var ProxyHandler = function(cbs){
if(cbs) this._cbs = cbs;
};
ProxyHandler.prototype._cbs = {};
Object.keys(require("./").EVENTS).forEach(function(name){
name = "on" + name;
Object.defineProperty(ProxyHandler.prototype, name, {
enumerable:true, configurable:true,
get: function(){ return this._cbs[name]; },
set: function(value){
//allow functions to be overwritten
Object.defineProperty(this, name, {value: value});
}
});
});
module.exports = ProxyHandler;

View File

@@ -0,0 +1,35 @@
var WritableStream = require("./WritableStream.js");
var Stream = function(options){
WritableStream.call(this, new cbs(this), options);
};
require("util").inherits(Stream, WritableStream);
Stream.prototype.readable = true;
var cbs = function(scope){
this.scope = scope;
};
var EVENTS = require("../").EVENTS;
Object.keys(EVENTS).forEach(function(name){
if(EVENTS[name] === 0){
cbs.prototype["on" + name] = function(){
this.scope.emit(name);
};
} else if(EVENTS[name] === 1){
cbs.prototype["on" + name] = function(a){
this.scope.emit(name, a);
};
} else if(EVENTS[name] === 2){
cbs.prototype["on" + name] = function(a, b){
this.scope.emit(name, a, b);
};
} else {
throw Error("wrong number of arguments!");
}
});
module.exports = Stream;

View File

@@ -0,0 +1,19 @@
var Parser = require("./Parser.js");
var WritableStream = function(cbs, options){
Parser.call(this, cbs, options);
};
require("util").inherits(WritableStream, require("stream").Stream);
//util.inherits would overwrite the prototype when called twice,
//so we need a different approach
Object.getOwnPropertyNames(Parser.prototype).forEach(function(name){
WritableStream.prototype[name] = Parser.prototype[name];
});
WritableStream.prototype.writable = true;
// TODO improve support for Parser#pause and Parser#continue
module.exports = WritableStream;

View File

@@ -0,0 +1,60 @@
var defineProp = Object.defineProperty;
module.exports = {
get Parser(){
defineProp(this, "Parser", {value:require("./Parser.js")});
return this.Parser;
},
get DomHandler(){
defineProp(this, "DomHandler", {value:require("domhandler")});
return this.DomHandler;
},
get FeedHandler(){
defineProp(this, "FeedHandler", {value:require("./FeedHandler.js")});
return this.FeedHandler;
},
get ElementType(){
defineProp(this, "ElementType", {value:require("domelementtype")});
return this.ElementType;
},
get Stream(){
defineProp(this, "Stream", {value:require("./Stream.js")});
return this.Stream;
},
get WritableStream(){
defineProp(this, "WritableStream", {value:require("./WritableStream.js")});
return this.WritableStream;
},
get ProxyHandler(){
defineProp(this, "ProxyHandler", {value:require("./ProxyHandler.js")});
return this.ProxyHandler;
},
get DomUtils(){
defineProp(this, "DomUtils", {value:require("domutils")});
return this.DomUtils;
},
// For legacy support
get DefaultHandler(){
defineProp(this, "DefaultHandler", {value: this.DomHandler});
return this.DefaultHandler;
},
get RssHandler(){
defineProp(this, "RssHandler", {value: this.FeedHandler});
return this.FeedHandler;
},
// List of all events that the parser emits
EVENTS: { /* Format: eventname: number of arguments */
attribute: 2,
cdatastart: 0,
cdataend: 0,
text: 1,
processinginstruction: 2,
comment: 1,
commentend: 0,
closetag: 1,
opentag: 2,
opentagname: 1,
error: 1,
end: 0
}
}

View File

@@ -0,0 +1,11 @@
Copyright (c) Felix Böhm
All rights reserved.
Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
THIS IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS,
EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

View File

@@ -0,0 +1,14 @@
//Types of elements found in the DOM
module.exports = {
Text: "text", //Text
Directive: "directive", //<? ... ?>
Comment: "comment", //<!-- ... -->
Script: "script", //<script> tags
Style: "style", //<style> tags
Tag: "tag", //Any tag
CDATA: "cdata", //<![CDATA[ ... ]]>
isTag: function(elem){
return elem.type === "tag" || elem.type === "script" || elem.type === "style";
}
};

View File

@@ -0,0 +1,26 @@
{
"name": "domelementtype",
"version": "1.1.1",
"description": "all the types of nodes in htmlparser2's dom",
"main": "index.js",
"repository": {
"type": "git",
"url": "git://github.com/FB55/domelementtype.git"
},
"keywords": [
"dom",
"htmlparser2"
],
"author": {
"name": "Felix Boehm",
"email": "me@feedic.com"
},
"readme": "all the types of nodes in htmlparser2's dom\n",
"readmeFilename": "readme.md",
"_id": "domelementtype@1.1.1",
"dist": {
"shasum": "7887acbda7614bb0a3dbe1b5e394f77a8ed297cf"
},
"_from": "domelementtype@1",
"_resolved": "https://registry.npmjs.org/domelementtype/-/domelementtype-1.1.1.tgz"
}

View File

@@ -0,0 +1 @@
all the types of nodes in htmlparser2's dom

View File

@@ -0,0 +1,6 @@
language: node_js
node_js:
- 0.4
- 0.6
- 0.8
- 0.9

View File

@@ -0,0 +1,159 @@
var ElementType = require("domelementtype");
function DomHandler(callback, options, elementCB){
if(typeof callback === "object"){
elementCB = options;
options = callback;
callback = null;
} else if(typeof options === "function"){
elementCB = options;
options = defaultOpts;
}
this._callback = callback;
this._options = options || defaultOpts;
this._elementCB = elementCB;
this.dom = [];
this._done = false;
this._tagStack = [];
}
//default options
var defaultOpts = {
ignoreWhitespace: false //Keep whitespace-only text nodes
};
//Resets the handler back to starting state
DomHandler.prototype.onreset = function(){
DomHandler.call(this, this._callback, this._options, this._elementCB);
};
//Signals the handler that parsing is done
DomHandler.prototype.onend = function(){
if(this._done) return;
this._done = true;
this._handleCallback(null);
};
DomHandler.prototype._handleCallback =
DomHandler.prototype.onerror = function(error){
if(typeof this._callback === "function"){
this._callback(error, this.dom);
} else {
if(error) throw error;
}
};
DomHandler.prototype.onclosetag = function(name){
//if(this._tagStack.pop().name !== name) this._handleCallback(Error("Tagname didn't match!"));
var elem = this._tagStack.pop();
if(this._elementCB) this._elementCB(elem);
};
DomHandler.prototype._addDomElement = function(element){
var lastTag = this._tagStack[this._tagStack.length - 1];
if(lastTag){
lastTag.children.push(element);
} else { //There aren't parent elements
this.dom.push(element);
}
};
DomHandler.prototype.onopentag = function(name, attribs){
var lastTag = this._tagStack[this._tagStack.length - 1];
var element = {
type: name === "script" ? ElementType.Script : name === "style" ? ElementType.Style : ElementType.Tag,
name: name,
attribs: attribs,
children: [],
prev: null,
next: null,
parent: lastTag || null
};
if(lastTag){
var idx = lastTag.children.length;
while(idx > 0){
if(ElementType.isTag(lastTag.children[--idx])){
element.prev = lastTag.children[idx];
lastTag.children[idx].next = element;
break;
}
}
lastTag.children.push(element);
} else {
this.dom.push(element);
}
this._tagStack.push(element);
};
DomHandler.prototype.ontext = function(data){
if(this._options.ignoreWhitespace && data.trim() === "") return;
var lastTag;
if(
(lastTag = this._tagStack[this._tagStack.length - 1]) &&
(lastTag = lastTag.children[lastTag.children.length - 1]) &&
lastTag.type === ElementType.Text
){
lastTag.data += data;
return;
}
this._addDomElement({
data: data,
type: ElementType.Text
});
};
DomHandler.prototype.oncomment = function(data){
var lastTag = this._tagStack[this._tagStack.length - 1];
if(lastTag && lastTag.type === ElementType.Comment){
lastTag.data += data;
return;
}
var element = {
data: data,
type: ElementType.Comment
};
this._addDomElement(element);
this._tagStack.push(element);
};
//TODO remove duplicated code
DomHandler.prototype.oncdata = function(data){
var lastTag = this._tagStack[this._tagStack.length - 1];
if(lastTag && lastTag.type === ElementType.CDATA){
lastTag.data += data;
return;
}
var element = {
data: data,
type: ElementType.CDATA
};
this._addDomElement(element);
this._tagStack.push(element);
};
DomHandler.prototype.oncommentend = DomHandler.prototype.oncdataend = function(){
this._tagStack.pop();
};
DomHandler.prototype.onprocessinginstruction = function(name, data){
this._addDomElement({
name: name,
data: data,
type: ElementType.Directive
});
};
module.exports = DomHandler;

View File

@@ -0,0 +1,38 @@
{
"name": "domhandler",
"version": "2.0.2",
"description": "htmlparser2's dom as a separate module",
"main": "index.js",
"directories": {
"test": "tests"
},
"scripts": {
"test": "node runtests.js"
},
"repository": {
"type": "git",
"url": "git://github.com/FB55/domhandler.git"
},
"keywords": [
"dom",
"htmlparser2"
],
"dependencies": {
"domelementtype": "1"
},
"devDependencies": {
"htmlparser2": "2.3"
},
"author": {
"name": "Felix Boehm",
"email": "me@feedic.com"
},
"readme": "#DOMHandler [![Build Status](https://secure.travis-ci.org/fb55/DomHandler.png)](http://travis-ci.org/fb55/DomHandler)\n\nThe DOM handler (formally known as DefaultHandler) creates a tree containing all nodes of a page. The tree may be manipulated using the DOMUtils library.\n\n##Usage\n```javascript\nvar handler = new DomHandler([ <func> callback(err, dom), ] [ <obj> options ]);\n// var parser = new Parser(handler[, options]);\n```\n\n##Example\n```javascript\nvar htmlparser = require(\"htmlparser2\");\nvar rawHtml = \"Xyz <script language= javascript>var foo = '<<bar>>';< / script><!--<!-- Waah! -- -->\";\nvar handler = new htmlparser.DomHandler(function (error, dom) {\n if (error)\n \t[...do something for errors...]\n else\n \t[...parsing done, do something...]\n console.log(dom);\n});\nvar parser = new htmlparser.Parser(handler);\nparser.write(rawHtml);\nparser.done();\n```\n\nOutput:\n\n```javascript\n[{\n data: 'Xyz ',\n type: 'text'\n}, {\n type: 'script',\n name: 'script',\n attribs: {\n \tlanguage: 'javascript'\n },\n children: [{\n \tdata: 'var foo = \\'<bar>\\';<',\n \ttype: 'text'\n }]\n}, {\n data: '<!-- Waah! -- ',\n type: 'comment'\n}]\n```\n\n##Option: ignoreWhitespace\nIndicates whether the DOM should exclude text nodes that consists solely of whitespace. The default value is \"false\". \n\nThe following HTML will be used:\n\n```html\n<font>\n\t<br>this is the text\n<font>\n```\n\n###Example: true\n\n```javascript\n[{\n type: 'tag',\n name: 'font',\n children: [{\n \ttype: 'tag',\n \tname: 'br'\n }, {\n \tdata: 'this is the text\\n',\n \ttype: 'text'\n }, {\n \ttype: 'tag',\n \tname: 'font'\n }]\n}]\n```\n\n###Example: false\n\n```javascript\n[{\n\ttype: 'tag',\n name: 'font',\n children: [{\n \tdata: '\\n\\t',\n \ttype: 'text'\n }, {\n \ttype: 'tag',\n \tname: 'br'\n }, {\n \tdata: 'this is the text\\n',\n \ttype: 'text'\n }, {\n \ttype: 'tag',\n \tname: 'font'\n }]\n}]\n```",
"readmeFilename": "readme.md",
"_id": "domhandler@2.0.2",
"dist": {
"shasum": "ca597f42c440173c64d80d838cca738164f1b5ac"
},
"_from": "domhandler@2.0",
"_resolved": "https://registry.npmjs.org/domhandler/-/domhandler-2.0.2.tgz"
}

View File

@@ -0,0 +1,99 @@
#DOMHandler [![Build Status](https://secure.travis-ci.org/fb55/DomHandler.png)](http://travis-ci.org/fb55/DomHandler)
The DOM handler (formally known as DefaultHandler) creates a tree containing all nodes of a page. The tree may be manipulated using the DOMUtils library.
##Usage
```javascript
var handler = new DomHandler([ <func> callback(err, dom), ] [ <obj> options ]);
// var parser = new Parser(handler[, options]);
```
##Example
```javascript
var htmlparser = require("htmlparser2");
var rawHtml = "Xyz <script language= javascript>var foo = '<<bar>>';< / script><!--<!-- Waah! -- -->";
var handler = new htmlparser.DomHandler(function (error, dom) {
if (error)
[...do something for errors...]
else
[...parsing done, do something...]
console.log(dom);
});
var parser = new htmlparser.Parser(handler);
parser.write(rawHtml);
parser.done();
```
Output:
```javascript
[{
data: 'Xyz ',
type: 'text'
}, {
type: 'script',
name: 'script',
attribs: {
language: 'javascript'
},
children: [{
data: 'var foo = \'<bar>\';<',
type: 'text'
}]
}, {
data: '<!-- Waah! -- ',
type: 'comment'
}]
```
##Option: ignoreWhitespace
Indicates whether the DOM should exclude text nodes that consists solely of whitespace. The default value is "false".
The following HTML will be used:
```html
<font>
<br>this is the text
<font>
```
###Example: true
```javascript
[{
type: 'tag',
name: 'font',
children: [{
type: 'tag',
name: 'br'
}, {
data: 'this is the text\n',
type: 'text'
}, {
type: 'tag',
name: 'font'
}]
}]
```
###Example: false
```javascript
[{
type: 'tag',
name: 'font',
children: [{
data: '\n\t',
type: 'text'
}, {
type: 'tag',
name: 'br'
}, {
data: 'this is the text\n',
type: 'text'
}, {
type: 'tag',
name: 'font'
}]
}]
```

View File

@@ -0,0 +1,51 @@
var fs = require("fs"),
path = require("path"),
assert = require("assert"),
Parser = require("htmlparser2").Parser,
Handler = require("./");
var basePath = path.resolve(__dirname, "tests"),
chunkSize = 5;
fs
.readdirSync(basePath)
.filter(RegExp.prototype.test, /\.json$/) //only allow .json files
.map(function(name){
return path.resolve(basePath, name);
})
.map(require)
.forEach(function(test){
console.log("Testing:", test.name);
var handler = new Handler(function(err, dom){
assert.ifError(err);
compare(test.expected, dom);
}, test.options.handler);
var data = test.html;
var parser = new Parser(handler, test.options.parser);
//first, try to run the test via chunks
for(var i = 0; i < data.length; i+=chunkSize){
parser.write(data.substring(i, i + chunkSize));
}
parser.done();
//then parse everything
parser.parseComplete(data);
});
console.log("\nAll tests passed!");
function compare(expected, result){
assert.equal(typeof expected, typeof result, "types didn't match");
if(typeof expected !== "object" || expected === null){
assert.strictEqual(expected, result, "result doesn't equal expected");
} else {
for(var prop in expected){
assert.ok(prop in result, "result didn't contain property " + prop);
compare(expected[prop], result[prop]);
}
}
}

View File

@@ -0,0 +1,44 @@
{
"name": "Basic test",
"options": {
"handler": {},
"parser": {}
},
"html": "<!DOCTYPE html><html><title>The Title</title><body>Hello world</body></html>",
"expected": [
{
"name": "!DOCTYPE",
"data": "!DOCTYPE html",
"type": "directive"
},
{
"type": "tag",
"name": "html",
"attribs": {},
"children": [
{
"type": "tag",
"name": "title",
"attribs": {},
"children": [
{
"data": "The Title",
"type": "text"
}
]
},
{
"type": "tag",
"name": "body",
"attribs": {},
"children": [
{
"data": "Hello world",
"type": "text"
}
]
}
]
}
]
}

View File

@@ -0,0 +1,24 @@
{
"name": "Single Tag 1",
"options": {
"handler": {},
"parser": {}
},
"html": "<br>text</br>",
"expected": [
{
"type": "tag",
"name": "br",
"attribs": {}
},
{
"data": "text",
"type": "text"
},
{
"type": "tag",
"name": "br",
"attribs": {}
}
]
}

View File

@@ -0,0 +1,24 @@
{
"name": "Single Tag 2",
"options": {
"handler": {},
"parser": {}
},
"html": "<br>text<br>",
"expected": [
{
"type": "tag",
"name": "br",
"attribs": {}
},
{
"data": "text",
"type": "text"
},
{
"type": "tag",
"name": "br",
"attribs": {}
}
]
}

View File

@@ -0,0 +1,30 @@
{
"name": "Unescaped chars in script",
"options": {
"handler": {},
"parser": {}
},
"html": "<head><script language=\"Javascript\">var foo = \"<bar>\"; alert(2 > foo); var baz = 10 << 2; var zip = 10 >> 1; var yap = \"<<>>>><<\";</script></head>",
"expected": [
{
"type": "tag",
"name": "head",
"attribs": {},
"children": [
{
"type": "script",
"name": "script",
"attribs": {
"language": "Javascript"
},
"children": [
{
"data": "var foo = \"<bar>\"; alert(2 > foo); var baz = 10 << 2; var zip = 10 >> 1; var yap = \"<<>>>><<\";",
"type": "text"
}
]
}
]
}
]
}

View File

@@ -0,0 +1,21 @@
{
"name": "Special char in comment",
"options": {
"handler": {},
"parser": {}
},
"html": "<head><!-- commented out tags <title>Test</title>--></head>",
"expected": [
{
"type": "tag",
"name": "head",
"attribs": {},
"children": [
{
"data": " commented out tags <title>Test</title>",
"type": "comment"
}
]
}
]
}

View File

@@ -0,0 +1,21 @@
{
"name": "Script source in comment",
"options": {
"handler": {},
"parser": {}
},
"html": "<script><!--var foo = 1;--></script>",
"expected": [
{
"type": "script",
"name": "script",
"attribs": {},
"children": [
{
"data": "<!--var foo = 1;-->",
"type": "text"
}
]
}
]
}

View File

@@ -0,0 +1,23 @@
{
"name": "Unescaped chars in style",
"options": {
"handler": {},
"parser": {}
},
"html": "<style type=\"text/css\">\n body > p\n\t{ font-weight: bold; }</style>",
"expected": [
{
"type": "style",
"name": "style",
"attribs": {
"type": "text/css"
},
"children": [
{
"data": "\n body > p\n\t{ font-weight: bold; }",
"type": "text"
}
]
}
]
}

View File

@@ -0,0 +1,23 @@
{
"name": "Extra spaces in tag",
"options": {
"handler": {},
"parser": {}
},
"html": "<\n font\t\n size='14' \n>the text<\n /\t\nfont\t \n>",
"expected": [
{
"type": "tag",
"name": "font",
"attribs": {
"size": "14"
},
"children": [
{
"data": "the text",
"type": "text"
}
]
}
]
}

View File

@@ -0,0 +1,23 @@
{
"name": "Unquoted attributes",
"options": {
"handler": {},
"parser": {}
},
"html": "<font size= 14>the text</font>",
"expected": [
{
"type": "tag",
"name": "font",
"attribs": {
"size": "14"
},
"children": [
{
"data": "the text",
"type": "text"
}
]
}
]
}

View File

@@ -0,0 +1,18 @@
{
"name": "Singular attribute",
"options": {
"handler": {},
"parser": {}
},
"html": "<option value='foo' selected>",
"expected": [
{
"type": "tag",
"name": "option",
"attribs": {
"value": "foo",
"selected": ""
}
}
]
}

View File

@@ -0,0 +1,23 @@
{
"name": "Text outside tags",
"options": {
"handler": {},
"parser": {}
},
"html": "Line one\n<br>\nline two",
"expected": [
{
"data": "Line one\n",
"type": "text"
},
{
"type": "tag",
"name": "br",
"attribs": {}
},
{
"data": "\nline two",
"type": "text"
}
]
}

View File

@@ -0,0 +1,14 @@
{
"name": "Only text",
"options": {
"handler": {},
"parser": {}
},
"html": "this is the text",
"expected": [
{
"data": "this is the text",
"type": "text"
}
]
}

View File

@@ -0,0 +1,22 @@
{
"name": "Comment within text",
"options": {
"handler": {},
"parser": {}
},
"html": "this is <!-- the comment --> the text",
"expected": [
{
"data": "this is ",
"type": "text"
},
{
"data": " the comment ",
"type": "comment"
},
{
"data": " the text",
"type": "text"
}
]
}

View File

@@ -0,0 +1,21 @@
{
"name": "Comment within text within script",
"options": {
"handler": {},
"parser": {}
},
"html": "<script>this is <!-- the comment --> the text</script>",
"expected": [
{
"type": "script",
"name": "script",
"attribs": {},
"children": [
{
"data": "this is <!-- the comment --> the text",
"type": "text"
}
]
}
]
}

View File

@@ -0,0 +1,25 @@
{
"name": "Option 'verbose' set to 'false'",
"options": {
"handler": {
"verbose": false
},
"parser": {}
},
"html": "<\n font\t\n size='14' \n>the text<\n /\t\nfont\t \n>",
"expected": [
{
"type": "tag",
"name": "font",
"attribs": {
"size": "14"
},
"children": [
{
"data": "the text",
"type": "text"
}
]
}
]
}

View File

@@ -0,0 +1,46 @@
{
"name": "Options 'ignoreWhitespace' set to 'true'",
"options": {
"handler": {
"ignoreWhitespace": true
},
"parser": {}
},
"html": "Line one\n<br> \t\n<br>\nline two<font>\n <br> x </font>",
"expected": [
{
"data": "Line one\n",
"type": "text"
},
{
"type": "tag",
"name": "br",
"attribs": {}
},
{
"type": "tag",
"name": "br",
"attribs": {}
},
{
"data": "\nline two",
"type": "text"
},
{
"type": "tag",
"name": "font",
"attribs": {},
"children": [
{
"type": "tag",
"name": "br",
"attribs": {}
},
{
"data": " x ",
"type": "text"
}
]
}
]
}

View File

@@ -0,0 +1,21 @@
{
"name": "XML Namespace",
"options": {
"handler": {},
"parser": {}
},
"html": "<ns:tag>text</ns:tag>",
"expected": [
{
"type": "tag",
"name": "ns:tag",
"attribs": {},
"children": [
{
"data": "text",
"type": "text"
}
]
}
]
}

View File

@@ -0,0 +1,19 @@
{
"name": "Enforce empty tags",
"options": {
"handler": {},
"parser": {}
},
"html": "<link>text</link>",
"expected": [
{
"type": "tag",
"name": "link",
"attribs": {}
},
{
"data": "text",
"type": "text"
}
]
}

View File

@@ -0,0 +1,23 @@
{
"name": "Ignore empty tags (xml mode)",
"options": {
"handler": {},
"parser": {
"xmlMode": true
}
},
"html": "<link>text</link>",
"expected": [
{
"type": "tag",
"name": "link",
"attribs": {},
"children": [
{
"data": "text",
"type": "text"
}
]
}
]
}

View File

@@ -0,0 +1,23 @@
{
"name": "Template script tags",
"options": {
"handler": {},
"parser": {}
},
"html": "<script type=\"text/template\"><h1>Heading1</h1></script>",
"expected": [
{
"type": "script",
"name": "script",
"attribs": {
"type": "text/template"
},
"children": [
{
"data": "<h1>Heading1</h1>",
"type": "text"
}
]
}
]
}

View File

@@ -0,0 +1,18 @@
{
"name": "Conditional comments",
"options": {
"handler": {},
"parser": {}
},
"html": "<!--[if lt IE 7]> <html class='no-js ie6 oldie' lang='en'> <![endif]--><!--[if lt IE 7]> <html class='no-js ie6 oldie' lang='en'> <![endif]-->",
"expected": [
{
"data": "[if lt IE 7]> <html class='no-js ie6 oldie' lang='en'> <![endif]",
"type": "comment"
},
{
"data": "[if lt IE 7]> <html class='no-js ie6 oldie' lang='en'> <![endif]",
"type": "comment"
}
]
}

View File

@@ -0,0 +1,46 @@
{
"name": "Basic test",
"options": {
"handler": {},
"parser": {
"lowerCaseTags": true
}
},
"html": "<!DOCTYPE html><HTML><TITLE>The Title</title><BODY>Hello world</body></html>",
"expected": [
{
"name": "!doctype",
"data": "!DOCTYPE html",
"type": "directive"
},
{
"type": "tag",
"name": "html",
"attribs": {},
"children": [
{
"type": "tag",
"name": "title",
"attribs": {},
"children": [
{
"data": "The Title",
"type": "text"
}
]
},
{
"type": "tag",
"name": "body",
"attribs": {},
"children": [
{
"data": "Hello world",
"type": "text"
}
]
}
]
}
]
}

View File

@@ -0,0 +1,224 @@
var ElementType = require("domelementtype"),
DomUtils = module.exports;
function find(test, arr, recurse, limit){
var result = [], childs;
for(var i = 0, j = arr.length; i < j; i++){
if(test(arr[i])){
result.push(arr[i]);
if(--limit <= 0) break;
}
childs = arr[i].children;
if(recurse && childs && childs.length > 0){
childs = find(test, childs, recurse, limit);
result = result.concat(childs);
limit -= childs.length;
if(limit <= 0) break;
}
}
return result;
}
function findOne(test, arr, recurse){
for(var i = 0, l = arr.length; i < l; i++){
if(test(arr[i])) return arr[i];
if(recurse && arr[i].children && arr[i].children.length > 0){
var elem = findOne(test, arr[i].children, true);
if(elem) return elem;
}
}
return null;
}
function findAll(test, arr){
return arr.reduce(function(arr, elem){
if(elem.children && elem.children.length > 0){
return arr.concat(findAll(test, elem.children));
} else {
return arr;
}
}, arr.filter(test));
}
var isTag = DomUtils.isTag = function(elem){
return elem.type === ElementType.Tag || elem.type === ElementType.Script || elem.type === ElementType.Style;
};
function filter(test, element, recurse, limit){
if(!Array.isArray(element)) element = [element];
if(typeof limit !== "number" || limit === Infinity){
if(recurse === false){
return element.filter(test);
} else {
return findAll(test, element);
}
} else if(limit === 1){
element = findOne(test, element, recurse !== false);
return element ? [element] : [];
} else {
return find(test, element, recurse !== false, limit);
}
}
DomUtils.filter = filter;
DomUtils.testElement = function(options, element){
for(var key in options){
if(!options.hasOwnProperty(key));
else if(key === "tag_name"){
if(!isTag(element) || !options.tag_name(element.name)){
return false;
}
} else if(key === "tag_type"){
if(!options.tag_type(element.type)) return false;
} else if(key === "tag_contains"){
if(isTag(element) || !options.tag_contains(element.data)){
return false;
}
} else if(!element.attribs || !options[key](element.attribs[key])){
return false;
}
}
return true;
};
var Checks = {
tag_name: function(name){
if(typeof name === "function"){
return function(elem){ return isTag(elem) && name(elem.name); };
} else if(name === "*"){
return isTag;
} else {
return function(elem){ return isTag(elem) && elem.name === name; };
}
},
tag_type: function(type){
if(typeof type === "function"){
return function(elem){ return type(elem.type); };
} else {
return function(elem){ return elem.type === type; };
}
},
tag_contains: function(data){
if(typeof type === "function"){
return function(elem){ return !isTag(elem) && data(elem.data); };
} else {
return function(elem){ return !isTag(elem) && elem.data === data; };
}
}
};
function getAttribCheck(attrib, value){
if(typeof value === "function"){
return function(elem){ return elem.attribs && value(elem.attribs[attrib]); };
} else {
return function(elem){ return elem.attribs && elem.attribs[attrib] === value; };
}
}
DomUtils.getElements = function(options, element, recurse, limit){
var funcs = [];
for(var key in options){
if(options.hasOwnProperty(key)){
if(key in Checks) funcs.push(Checks[key](options[key]));
else funcs.push(getAttribCheck(key, options[key]));
}
}
if(funcs.length === 0) return [];
if(funcs.length === 1) return filter(funcs[0], element, recurse, limit);
return filter(
function(elem){
return funcs.some(function(func){ return func(elem); });
},
element, recurse, limit
);
};
DomUtils.getElementById = function(id, element, recurse){
if(!Array.isArray(element)) element = [element];
return findOne(getAttribCheck("id", id), element, recurse !== false);
};
DomUtils.getElementsByTagName = function(name, element, recurse, limit){
return filter(Checks.tag_name(name), element, recurse, limit);
};
DomUtils.getElementsByTagType = function(type, element, recurse, limit){
return filter(Checks.tag_type(type), element, recurse, limit);
};
DomUtils.removeElement = function(elem){
if(elem.prev) elem.prev.next = elem.next;
if(elem.next) elem.next.prev = elem.prev;
if(elem.parent){
elem.parent.children.splice(elem.parent.children.lastIndexOf(elem), 1);
}
};
DomUtils.getInnerHTML = function(elem){
if(!elem.children) return "";
var childs = elem.children,
childNum = childs.length,
ret = "";
for(var i = 0; i < childNum; i++){
ret += DomUtils.getOuterHTML(childs[i]);
}
return ret;
};
//boolean attributes without a value (taken from MatthewMueller/cheerio)
var booleanAttribs = {
__proto__: null,
async: true,
autofocus: true,
autoplay: true,
checked: true,
controls: true,
defer: true,
disabled: true,
hidden: true,
loop: true,
multiple: true,
open: true,
readonly: true,
required: true,
scoped: true,
selected: true,
"/": true //TODO when is this required?
};
DomUtils.getOuterHTML = function(elem){
var type = elem.type;
if(type === ElementType.Text) return elem.data;
if(type === ElementType.Comment) return "<!--" + elem.data + "-->";
if(type === ElementType.Directive) return "<" + elem.data + ">";
if(type === ElementType.CDATA) return "<!CDATA " + elem.data + "]]>";
var ret = "<" + elem.name;
if("attribs" in elem){
for(var attr in elem.attribs){
if(elem.attribs.hasOwnProperty(attr)){
ret += " " + attr;
var value = elem.attribs[attr];
if(!value){
if( !(attr in booleanAttribs) ){
ret += '=""';
}
} else {
ret += '="' + value + '"';
}
}
}
}
return ret + ">" + DomUtils.getInnerHTML(elem) + "</" + elem.name + ">";
};

View File

@@ -0,0 +1,39 @@
{
"name": "domutils",
"version": "1.0.1",
"description": "utilities for working with htmlparser2's dom",
"main": "index.js",
"directories": {
"test": "tests"
},
"scripts": {
"test": "node tests/00-runtests.js"
},
"repository": {
"type": "git",
"url": "git://github.com/FB55/domutils.git"
},
"keywords": [
"dom",
"htmlparser2"
],
"dependencies": {
"domelementtype": "1"
},
"devDependencies": {
"htmlparser2": "2.3",
"domhandler": "2"
},
"author": {
"name": "Felix Boehm",
"email": "me@feedic.com"
},
"readme": "utilities for working with htmlparser2's dom\n",
"readmeFilename": "readme.md",
"_id": "domutils@1.0.1",
"dist": {
"shasum": "58b58d774774911556c16b8b02d99c609d987869"
},
"_from": "domutils@1.0",
"_resolved": "https://registry.npmjs.org/domutils/-/domutils-1.0.1.tgz"
}

View File

@@ -0,0 +1 @@
utilities for working with htmlparser2's dom

View File

@@ -0,0 +1,64 @@
var fs = require("fs"),
assert = require("assert");
var runCount = 0,
testCount = 0;
function compare(expected, result){
if(typeof expected !== typeof result){
throw Error("types didn't match");
}
if(typeof expected !== "object" || expected === null){
if(expected !== result){
throw Error("result doesn't equal expected");
}
return;
}
for(var prop in expected){
if(!(prop in result)) throw Error("result didn't contain property " + prop);
compare(expected[prop], result[prop]);
}
}
function runTests(test){
//read files, load them, run them
fs.readdirSync(__dirname + test.dir
).map(function(file){
if(file[0] === ".") return false;
if(file.substr(-5) === ".json") return JSON.parse(
fs.readFileSync(__dirname + test.dir + file)
);
return require(__dirname + test.dir + file);
}).forEach(function(file){
if(!file) return;
var second = false;
runCount++;
console.log("Testing:", file.name);
test.test(file, function(err, dom){
assert.ifError(err);
compare(file.expected, dom);
if(second){
runCount--;
testCount++;
}
else second = true;
});
});
console.log("->", test.dir.slice(1, -1), "started");
}
//run all tests
[
"./02-dom_utils.js"
].map(require).forEach(runTests);
//log the results
(function check(){
if(runCount !== 0) return process.nextTick(check);
console.log("Total tests:", testCount);
}());

View File

@@ -0,0 +1,15 @@
//generate a dom
var handler = new (require("domhandler"))();
(new (require("htmlparser2").Parser)(handler)).parseComplete(
Array(21).join("<?xml><tag1 id='asdf'> <script>text</script> <!-- comment --> <tag2> text </tag1>")
);
var dom = handler.dom;
exports.dir = "/DomUtils/";
exports.test = function(test, cb){
cb(null, test.getElements(dom));
cb(null, test.getByFunction(dom));
};

View File

@@ -0,0 +1,56 @@
var DomUtils = require("../..");
exports.name = "Get element by id";
exports.getElements = function(dom){
return DomUtils.getElements({id:"asdf"}, dom, true, 1)[0];
};
exports.getByFunction = function(dom){
return DomUtils.getElementById("asdf", dom, true);
};
exports.expected = {
"type": "tag",
"name": "tag1",
"attribs": {
"id": "asdf"
},
"children": [
{
"data": " ",
"type": "text"
},
{
"type": "script",
"name": "script",
"attribs": {},
"children": [
{
"data": "text",
"type": "text"
}
]
},
{
"data": " ",
"type": "text"
},
{
"data": " comment ",
"type": "comment"
},
{
"data": " ",
"type": "text"
},
{
"type": "tag",
"name": "tag2",
"attribs": {},
"children": [
{
"data": " text ",
"type": "text"
}
]
}
]
};

View File

@@ -0,0 +1,23 @@
var DomUtils = require("../..");
exports.name = "Get elements by tagName";
exports.getElements = function(dom){
return DomUtils.getElements({tag_name:"tag2"}, dom, true);
};
exports.getByFunction = function(dom){
return DomUtils.getElementsByTagName("tag2", dom, true);
};
exports.expected = [];
for(var i = 0; i < 20; i++) exports.expected.push(
{
"type": "tag",
"name": "tag2",
"attribs": {},
"children": [
{
"data": " text ",
"type": "text"
}
]
}
);

Some files were not shown because too many files have changed in this diff Show More