new changes

This commit is contained in:
Niranjan
2026-04-07 05:05:28 +05:30
parent 7c070224bd
commit a18bba15f2
29975 changed files with 3247495 additions and 2761 deletions

View File

@@ -0,0 +1,147 @@
import type { CheerioAPI } from '../load.js';
import { load } from '../load-parse.js';
/** A Cheerio instance with no content. */
export const cheerio: CheerioAPI = load([]);
export const fruits: string = [
'<ul id="fruits">',
'<li class="apple">Apple</li>',
'<li class="orange">Orange</li>',
'<li class="pear">Pear</li>',
'</ul>',
].join('');
export const vegetables: string = [
'<ul id="vegetables">',
'<li class="carrot">Carrot</li>',
'<li class="sweetcorn">Sweetcorn</li>',
'</ul>',
].join('');
export const divcontainers: string = [
'<div class="container">',
'<div class="inner">First</div>',
'<div class="inner">Second</div>',
'</div>',
'<div class="container">',
'<div class="inner">Third</div>',
'<div class="inner">Fourth</div>',
'</div>',
'<div id="new"><div>',
'<div>\n\n<p><em><b></b></em></p>\n\n</div>',
'</div>',
].join('');
export const chocolates: string = [
'<ul id="chocolates">',
'<li class="linth" data-highlight="Lindor" data-origin="swiss">Linth</li>',
'<li class="frey" data-taste="sweet" data-best-collection="Mahony">Frey</li>',
'<li class="cailler">Cailler</li>',
'</ul>',
].join('');
export const drinks: string = [
'<ul id="drinks">',
'<li class="beer">Beer</li>',
'<li class="juice">Juice</li>',
'<li class="milk">Milk</li>',
'<li class="water">Water</li>',
'<li class="cider">Cider</li>',
'</ul>',
].join('');
export const food: string = [
'<ul id="food">',
fruits,
vegetables,
'</ul>',
].join('');
export const eleven = `
<html>
<body>
<ul>
<li>One</li>
<li>Two</li>
<li class="blue sel">Three</li>
<li class="red">Four</li>
</ul>
<ul>
<li class="red">Five</li>
<li>Six</li>
<li class="blue">Seven</li>
</ul>
<ul>
<li>Eight</li>
<li class="red sel">Nine</li>
<li>Ten</li>
<li class="sel">Eleven</li>
</ul>
</body>
</html>
`;
export const unwrapspans: string = [
'<div id=unwrap style="display: none;">',
'<div id=unwrap1><span class=unwrap>a</span><span class=unwrap>b</span></div>',
'<div id=unwrap2><span class=unwrap>c</span><span class=unwrap>d</span></div>',
'<div id=unwrap3><b><span class="unwrap unwrap3">e</span></b><b><span class="unwrap unwrap3">f</span></b></div>',
'</div>',
].join('');
export const inputs: string = [
'<select id="one"><option value="option_not_selected">Option not selected</option><option value="option_selected" selected>Option selected</option></select>',
'<select id="one-valueless"><option>Option not selected</option><option selected>Option selected</option></select>',
'<select id="one-html-entity"><option>Option not selected</option><option selected>Option &lt;selected&gt;</option></select>',
'<select id="one-nested"><option>Option not selected</option><option selected>Option <span>selected</span></option></select>',
'<input type="text" value="input_text" />',
'<input type="checkbox" name="checkbox_off" value="off" /><input type="checkbox" name="checkbox_on" value="on" checked />',
'<input type="checkbox" name="checkbox_valueless" />',
'<input type="radio" value="off" name="radio" /><input type="radio" name="radio" value="on" checked />',
'<input type="radio" value="off" name="radio[brackets]" /><input type="radio" name="radio[brackets]" value="on" checked />',
'<input type="radio" name="radio_valueless" />',
'<select id="multi" multiple><option value="1">1</option><option value="2" selected>2</option><option value="3" selected>3</option><option value="4">4</option></select>',
'<select id="multi-valueless" multiple><option>1</option><option selected>2</option><option selected>3</option><option>4</option></select>',
].join('');
export const text: string = [
'<p>Apples, <b>oranges</b> and pears.</p>',
'<p>Carrots and <!-- sweetcorn --></p>',
].join('');
export const forms: string = [
'<form id="simple"><input type="text" name="fruit" value="Apple" /></form>',
'<form id="nested"><div><input type="text" name="fruit" value="Apple" /></div><input type="text" name="vegetable" value="Carrot" /></form>',
'<form id="disabled"><input type="text" name="fruit" value="Apple" disabled /></form>',
'<form id="submit"><input type="text" name="fruit" value="Apple" /><input type="submit" name="submit" value="Submit" /></form>',
'<form id="select"><select name="fruit"><option value="Apple">Apple</option><option value="Orange" selected>Orange</option></select></form>',
'<form id="unnamed"><input type="text" name="fruit" value="Apple" /><input type="text" value="Carrot" /></form>',
'<form id="multiple"><select name="fruit" multiple><option value="Apple" selected>Apple</option><option value="Orange" selected>Orange</option><option value="Carrot">Carrot</option></select></form>',
'<form id="textarea"><textarea name="fruits">Apple\nOrange</textarea></form>',
'<form id="spaces"><input type="text" name="fruit" value="Blood orange" /></form>',
].join('');
export const noscript: string = [
'</body>',
'<noscript>',
'<!-- anchor linking to external file -->',
'<a href="https://github.com/cheeriojs/cheerio">External Link</a>',
'</noscript>',
'<p>Rocks!</p>',
'</body>',
].join('');
export const script: string = [
'<div>',
'<a>A</a>',
'<script>',
' var foo = "bar";',
'</script>',
'<b>B</b>',
'</div>',
].join('');
export const mixedText = '<a>1</a>TEXT<b>2</b>';

View File

@@ -0,0 +1,266 @@
/**
* This file includes tests for deprecated APIs. The methods are expected to be
* removed in the next major release of Cheerio, but their stability should be
* maintained until that time.
*/
import { describe, it, expect, beforeEach } from 'vitest';
import { cheerio, food, fruits } from '../__fixtures__/fixtures.js';
describe('deprecated APIs', () => {
describe('cheerio module', () => {
describe('.parseHTML', () => {
it('(html) : should preserve content', () => {
const html = '<div>test div</div>';
expect(cheerio(cheerio.parseHTML(html)[0]).html()).toBe('test div');
});
});
describe('.merge', () => {
it('should be a function', () => {
expect(typeof cheerio.merge).toBe('function');
});
// #1674 - merge, wont accept Cheerio object
it('should be a able merge array and cheerio object', () => {
const ret = cheerio.merge<unknown>(cheerio(), ['elem1', 'elem2']);
expect(typeof ret).toBe('object');
expect(ret).toHaveLength(2);
});
it('(arraylike, arraylike) : should modify the first array, but not the second', () => {
const arr1 = [1, 2, 3];
const arr2 = [4, 5, 6];
const ret = cheerio.merge(arr1, arr2);
expect(typeof ret).toBe('object');
expect(Array.isArray(ret)).toBe(true);
expect(ret).toBe(arr1);
expect(arr1).toHaveLength(6);
expect(arr2).toHaveLength(3);
});
it('(arraylike, arraylike) : should handle objects that arent arrays, but are arraylike', () => {
const arr1: ArrayLike<string> = {
length: 3,
0: 'a',
1: 'b',
2: 'c',
};
const arr2 = {
length: 3,
0: 'd',
1: 'e',
2: 'f',
};
cheerio.merge(arr1, arr2);
expect(arr1).toHaveLength(6);
expect(arr1[3]).toBe('d');
expect(arr1[4]).toBe('e');
expect(arr1[5]).toBe('f');
expect(arr2).toHaveLength(3);
});
it('(?, ?) : should gracefully reject invalid inputs', () => {
expect(cheerio.merge([4], 3 as never)).toBeUndefined();
expect(cheerio.merge({} as never, {} as never)).toBeUndefined();
expect(cheerio.merge([], {} as never)).toBeUndefined();
expect(cheerio.merge({} as never, [])).toBeUndefined();
const fakeArray = { length: 3, 0: 'a', 1: 'b', 3: 'd' };
expect(cheerio.merge(fakeArray, [])).toBeUndefined();
expect(cheerio.merge([], fakeArray)).toBeUndefined();
expect(cheerio.merge({ length: '7' } as never, [])).toBeUndefined();
expect(cheerio.merge({ length: -1 }, [])).toBeUndefined();
});
it('(?, ?) : should no-op on invalid inputs', () => {
const fakeArray1 = { length: 3, 0: 'a', 1: 'b', 3: 'd' };
cheerio.merge(fakeArray1, []);
expect(fakeArray1).toHaveLength(3);
expect(fakeArray1[0]).toBe('a');
expect(fakeArray1[1]).toBe('b');
expect(fakeArray1[3]).toBe('d');
cheerio.merge([], fakeArray1);
expect(fakeArray1).toHaveLength(3);
expect(fakeArray1[0]).toBe('a');
expect(fakeArray1[1]).toBe('b');
expect(fakeArray1[3]).toBe('d');
});
});
describe('.contains', () => {
let $: typeof cheerio;
beforeEach(() => {
$ = cheerio.load(food);
});
it('(container, contained) : should correctly detect the provided element', () => {
const $food = $('#food');
const $fruits = $('#fruits');
const $apple = $('.apple');
expect(cheerio.contains($food[0], $fruits[0])).toBe(true);
expect(cheerio.contains($food[0], $apple[0])).toBe(true);
});
it('(container, other) : should not detect elements that are not contained', () => {
const $fruits = $('#fruits');
const $vegetables = $('#vegetables');
const $apple = $('.apple');
expect(cheerio.contains($vegetables[0], $apple[0])).toBe(false);
expect(cheerio.contains($fruits[0], $vegetables[0])).toBe(false);
expect(cheerio.contains($vegetables[0], $fruits[0])).toBe(false);
expect(cheerio.contains($fruits[0], $fruits[0])).toBe(false);
expect(cheerio.contains($vegetables[0], $vegetables[0])).toBe(false);
});
});
describe('.root', () => {
it('returns an empty selection', () => {
const $empty = cheerio.root();
expect($empty).toHaveLength(1);
expect($empty[0].children).toHaveLength(0);
});
});
});
describe('Cheerio function', () => {
it('.load', () => {
const $1 = cheerio.load(fruits);
const $2 = $1.load('<div><p>Some <a>text</a>.</p></div>');
expect($2('a')).toHaveLength(1);
});
/**
* The `.html` static method defined on the "loaded" Cheerio factory
* function is deprecated.
*
* In order to promote consistency with the jQuery library, users are
* encouraged to instead use the instance method of the same name.
*
* @example
*
* ```js
* const $ = cheerio.load('<h1>Hello, <span>world</span>.</h1>');
*
* $('h1').html();
* //=> '<h1>Hello, <span>world</span>.'
* ```
*
* @example <caption>To render the markup of an entire document, invoke the
* `html` function exported by the Cheerio module with a "root"
* selection.</caption>
*
* ```js
* cheerio.html($.root());
* //=> '<html><head></head><body><h1>Hello, <span>world</span>.</h1></body></html>'
* ```
*/
describe('.html - deprecated API', () => {
it('() : of empty cheerio object should return null', () => {
/*
* Note: the direct invocation of the Cheerio constructor function is
* also deprecated.
*/
const $ = cheerio();
expect($.html()).toBe(null);
});
it('(selector) : should return the outerHTML of the selected element', () => {
const $ = cheerio.load(fruits);
expect($.html('.pear')).toBe('<li class="pear">Pear</li>');
});
});
/**
* The `.xml` static method defined on the "loaded" Cheerio factory function
* is deprecated. Users are encouraged to instead use the `xml` function
* exported by the Cheerio module.
*
* @example
*
* ```js
* cheerio.xml($.root());
* ```
*/
describe('.xml - deprecated API', () => {
it('() : renders XML', () => {
const $ = cheerio.load('<foo></foo>', { xmlMode: true });
expect($.xml()).toBe('<foo/>');
});
});
/**
* The `.text` static method defined on the "loaded" Cheerio factory
* function is deprecated.
*
* In order to promote consistency with the jQuery library, users are
* encouraged to instead use the instance method of the same name.
*
* @example
*
* ```js
* const $ = cheerio.load('<h1>Hello, <span>world</span>.</h1>');
* $('h1').text();
* //=> 'Hello, world.'
* ```
*
* @example <caption>To render the text content of an entire document,
* invoke the `text` function exported by the Cheerio module with a "root"
* selection. </caption>
*
* ```js
* cheerio.text($.root());
* //=> 'Hello, world.'
* ```
*/
describe('.text - deprecated API', () => {
it('(cheerio object) : should return the text contents of the specified elements', () => {
const $ = cheerio.load('<a>This is <em>content</em>.</a>');
expect($.text($('a'))).toBe('This is content.');
});
it('(cheerio object) : should omit comment nodes', () => {
const $ = cheerio.load(
'<a>This is <!-- a comment --> not a comment.</a>',
);
expect($.text($('a'))).toBe('This is not a comment.');
});
it('(cheerio object) : should include text contents of children recursively', () => {
const $ = cheerio.load(
'<a>This is <div>a child with <span>another child and <!-- a comment --> not a comment</span> followed by <em>one last child</em> and some final</div> text.</a>',
);
expect($.text($('a'))).toBe(
'This is a child with another child and not a comment followed by one last child and some final text.',
);
});
it('() : should return the rendered text content of the root', () => {
const $ = cheerio.load(
'<a>This is <div>a child with <span>another child and <!-- a comment --> not a comment</span> followed by <em>one last child</em> and some final</div> text.</a>',
);
expect($.text()).toBe(
'This is a child with another child and not a comment followed by one last child and some final text.',
);
});
it('(cheerio object) : should not omit script tags', () => {
const $ = cheerio.load('<script>console.log("test")</script>');
expect($.text()).toBe('console.log("test")');
});
it('(cheerio object) : should omit style tags', () => {
const $ = cheerio.load(
'<style type="text/css">.cf-hidden { display: none; }</style>',
);
expect($.text()).toBe('.cf-hidden { display: none; }');
});
});
});
});

View File

@@ -0,0 +1,66 @@
import { describe, it, expect } from 'vitest';
import { load } from '../index.js';
import type { CheerioOptions } from '../options.js';
function xml(str: string, options?: CheerioOptions) {
options = { xml: true, ...options };
const $ = load(str, options);
return $.xml();
}
function dom(str: string, options?: CheerioOptions) {
const $ = load('', options);
return $(str).html();
}
describe('render', () => {
describe('(xml)', () => {
it('should render <media:thumbnail /> tags correctly', () => {
const str =
'<media:thumbnail url="http://www.foo.com/keyframe.jpg" width="75" height="50" time="12:05:01.123" />';
expect(xml(str)).toBe(
'<media:thumbnail url="http://www.foo.com/keyframe.jpg" width="75" height="50" time="12:05:01.123"/>',
);
});
it('should render <link /> tags (RSS) correctly', () => {
const str = '<link>http://www.github.com/</link>';
expect(xml(str)).toBe('<link>http://www.github.com/</link>');
});
it('should escape entities', () => {
const str = '<tag attr="foo &amp; bar"/>';
expect(xml(str)).toBe(str);
});
it('should render HTML as XML', () => {
const $ = load('<foo></foo>', null, false);
expect($.xml()).toBe('<foo/>');
});
});
describe('(dom)', () => {
it('should not keep camelCase for new nodes', () => {
const str = '<g><someElem someAttribute="something">hello</someElem></g>';
expect(dom(str, { xml: false })).toBe(
'<someelem someattribute="something">hello</someelem>',
);
});
it('should keep camelCase for new nodes', () => {
const str = '<g><someElem someAttribute="something">hello</someElem></g>';
expect(dom(str, { xml: true })).toBe(
'<someElem someAttribute="something">hello</someElem>',
);
});
it('should maintain the parsing options of distinct contexts independently', () => {
const str = '<g><someElem someAttribute="something">hello</someElem></g>';
const $ = load('', { xml: false });
expect($(str).html()).toBe(
'<someelem someattribute="something">hello</someelem>',
);
});
});
});

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,138 @@
import { describe, it, expect, beforeEach } from 'vitest';
import { load, type Cheerio } from '../index.js';
import type { Element } from 'domhandler';
import { cheerio, mixedText } from '../__fixtures__/fixtures.js';
describe('$(...)', () => {
describe('.css', () => {
it('(prop): should return a css property value', () => {
const el = cheerio('<li style="hai: there">');
expect(el.css('hai')).toBe('there');
});
it('([prop1, prop2]): should return the specified property values as an object', () => {
const el = cheerio(
'<li style="margin: 1px; padding: 2px; color: blue;">',
);
expect(el.css(['margin', 'color'])).toStrictEqual({
margin: '1px',
color: 'blue',
});
});
it('(prop, val): should set a css property', () => {
const el = cheerio('<li style="margin: 0;"></li><li></li>');
el.css('color', 'red');
expect(el.attr('style')).toBe('margin: 0; color: red;');
expect(el.eq(1).attr('style')).toBe('color: red;');
});
it('(prop, val) : should skip text nodes', () => {
const $text = load(mixedText);
const $body = $text($text('body')[0].children);
$body.css('test', 'value');
expect($text('body').html()).toBe(
'<a style="test: value;">1</a>TEXT<b style="test: value;">2</b>',
);
});
it('(prop, ""): should unset a css property', () => {
const el = cheerio('<li style="padding: 1px; margin: 0;">');
el.css('padding', '');
expect(el.attr('style')).toBe('margin: 0;');
});
it('(any, val): should ignore unsupported prop types', () => {
const el = cheerio('<li style="padding: 1px;">');
el.css(123 as never, 'test');
expect(el.attr('style')).toBe('padding: 1px;');
});
it('(prop): should not mangle embedded urls', () => {
const el = cheerio(
'<li style="background-image:url(http://example.com/img.png);">',
);
expect(el.css('background-image')).toBe(
'url(http://example.com/img.png)',
);
});
it('(prop): should ignore blank properties', () => {
const el = cheerio('<li style=":#ccc;color:#aaa;">');
expect(el.css()).toStrictEqual({ color: '#aaa' });
});
it('(prop): should ignore blank values', () => {
const el = cheerio('<li style="color:;position:absolute;">');
expect(el.css()).toStrictEqual({ position: 'absolute' });
});
it('(prop): should return undefined for unmatched elements', () => {
const $ = load('<li style="color:;position:absolute;">');
expect($('ul').css('background-image')).toBeUndefined();
});
it('(prop): should return undefined for unmatched styles', () => {
const el = cheerio('<li style="color:;position:absolute;">');
expect(el.css('margin')).toBeUndefined();
});
describe('(prop, function):', () => {
let $el: Cheerio<Element>;
beforeEach(() => {
const $ = load(
'<div style="margin: 0px;"></div><div style="margin: 1px;"></div><div style="margin: 2px;">',
);
$el = $('div');
});
it('should iterate over the selection', () => {
let count = 0;
$el.css('margin', function (idx, value) {
expect(idx).toBe(count);
expect(value).toBe(`${count}px`);
expect(this).toBe($el[count]);
count++;
return undefined;
});
expect(count).toBe(3);
});
it('should set each attribute independently', () => {
const values = ['4px', '', undefined];
$el.css('margin', (idx) => values[idx]);
expect($el.eq(0).attr('style')).toBe('margin: 4px;');
expect($el.eq(1).attr('style')).toBe('');
expect($el.eq(2).attr('style')).toBe('margin: 2px;');
});
});
it('(obj): should set each key and val', () => {
const el = cheerio('<li style="padding: 0;"></li><li></li>');
el.css({ foo: 0 } as never);
expect(el.eq(0).attr('style')).toBe('padding: 0; foo: 0;');
expect(el.eq(1).attr('style')).toBe('foo: 0;');
});
describe('parser', () => {
it('should allow any whitespace between declarations', () => {
const el = cheerio('<li style="one \t:\n 0;\n two \f\r:\v 1">');
expect(el.css(['one', 'two', 'five'])).toStrictEqual({
one: '0',
two: '1',
});
});
it('should add malformed values to previous field (#1134)', () => {
const el = cheerio(
'<button style="background-image: url(data:image/png;base64,iVBORw0KGgo)"></button>',
);
expect(el.css('background-image')).toStrictEqual(
'url(data:image/png;base64,iVBORw0KGgo)',
);
});
});
});
});

View File

@@ -0,0 +1,224 @@
import { domEach } from '../utils.js';
import { isTag, type Element, type AnyNode } from 'domhandler';
import type { Cheerio } from '../cheerio.js';
/**
* Get the value of a style property for the first element in the set of matched
* elements.
*
* @category CSS
* @param names - Optionally the names of the properties of interest.
* @returns A map of all of the style properties.
* @see {@link https://api.jquery.com/css/}
*/
export function css<T extends AnyNode>(
this: Cheerio<T>,
names?: string[],
): Record<string, string> | undefined;
/**
* Get the value of a style property for the first element in the set of matched
* elements.
*
* @category CSS
* @param name - The name of the property.
* @returns The property value for the given name.
* @see {@link https://api.jquery.com/css/}
*/
export function css<T extends AnyNode>(
this: Cheerio<T>,
name: string,
): string | undefined;
/**
* Set one CSS property for every matched element.
*
* @category CSS
* @param prop - The name of the property.
* @param val - The new value.
* @returns The instance itself.
* @see {@link https://api.jquery.com/css/}
*/
export function css<T extends AnyNode>(
this: Cheerio<T>,
prop: string,
val:
| string
| ((this: Element, i: number, style: string) => string | undefined),
): Cheerio<T>;
/**
* Set multiple CSS properties for every matched element.
*
* @category CSS
* @param map - A map of property names and values.
* @returns The instance itself.
* @see {@link https://api.jquery.com/css/}
*/
export function css<T extends AnyNode>(
this: Cheerio<T>,
map: Record<string, string>,
): Cheerio<T>;
/**
* Set multiple CSS properties for every matched element.
*
* @category CSS
* @param prop - The names of the properties.
* @param val - The new values.
* @returns The instance itself.
* @see {@link https://api.jquery.com/css/}
*/
export function css<T extends AnyNode>(
this: Cheerio<T>,
prop?: string | string[] | Record<string, string>,
val?:
| string
| ((this: Element, i: number, style: string) => string | undefined),
): Cheerio<T> | Record<string, string> | string | undefined {
if (
(prop != null && val != null) ||
// When `prop` is a "plain" object
(typeof prop === 'object' && !Array.isArray(prop))
) {
return domEach(this, (el, i) => {
if (isTag(el)) {
// `prop` can't be an array here anymore.
setCss(el, prop as string, val, i);
}
});
}
if (this.length === 0) {
return undefined;
}
return getCss(this[0], prop as string);
}
/**
* Set styles of all elements.
*
* @private
* @param el - Element to set style of.
* @param prop - Name of property.
* @param value - Value to set property to.
* @param idx - Optional index within the selection.
*/
function setCss(
el: Element,
prop: string | Record<string, string>,
value:
| string
| ((this: Element, i: number, style: string) => string | undefined)
| undefined,
idx: number,
) {
if (typeof prop === 'string') {
const styles = getCss(el);
const val =
typeof value === 'function' ? value.call(el, idx, styles[prop]) : value;
if (val === '') {
delete styles[prop];
} else if (val != null) {
styles[prop] = val;
}
el.attribs['style'] = stringify(styles);
} else if (typeof prop === 'object') {
const keys = Object.keys(prop);
for (let i = 0; i < keys.length; i++) {
const k = keys[i];
setCss(el, k, prop[k], i);
}
}
}
/**
* Get the parsed styles of the first element.
*
* @private
* @category CSS
* @param el - Element to get styles from.
* @param props - Optionally the names of the properties of interest.
* @returns The parsed styles.
*/
function getCss(el: AnyNode, props?: string[]): Record<string, string>;
/**
* Get a property from the parsed styles of the first element.
*
* @private
* @category CSS
* @param el - Element to get styles from.
* @param prop - Name of the prop.
* @returns The value of the property.
*/
function getCss(el: AnyNode, prop: string): string | undefined;
function getCss(
el: AnyNode,
prop?: string | string[],
): Record<string, string> | string | undefined {
if (!el || !isTag(el)) return;
const styles = parse(el.attribs['style']);
if (typeof prop === 'string') {
return styles[prop];
}
if (Array.isArray(prop)) {
const newStyles: Record<string, string> = {};
for (const item of prop) {
if (styles[item] != null) {
newStyles[item] = styles[item];
}
}
return newStyles;
}
return styles;
}
/**
* Stringify `obj` to styles.
*
* @private
* @category CSS
* @param obj - Object to stringify.
* @returns The serialized styles.
*/
function stringify(obj: Record<string, string>): string {
return Object.keys(obj).reduce(
(str, prop) => `${str}${str ? ' ' : ''}${prop}: ${obj[prop]};`,
'',
);
}
/**
* Parse `styles`.
*
* @private
* @category CSS
* @param styles - Styles to be parsed.
* @returns The parsed styles.
*/
function parse(styles: string): Record<string, string> {
styles = (styles || '').trim();
if (!styles) return {};
const obj: Record<string, string> = {};
let key: string | undefined;
for (const str of styles.split(';')) {
const n = str.indexOf(':');
// If there is no :, or if it is the first/last character, add to the previous item's value
if (n < 1 || n === str.length - 1) {
const trimmed = str.trimEnd();
if (trimmed.length > 0 && key !== undefined) {
obj[key] += `;${trimmed}`;
}
} else {
key = str.slice(0, n).trim();
obj[key] = str.slice(n + 1).trim();
}
}
return obj;
}

View File

@@ -0,0 +1,121 @@
import { describe, it, expect } from 'vitest';
import * as fixtures from '../__fixtures__/fixtures.js';
import { load } from '../load-parse.js';
interface RedSelObject {
red: string | undefined;
sel: string | undefined;
}
interface RedSelMultipleObject {
red: string[];
sel: string[];
}
describe('$.extract', () => {
it('() : should extract values for selectors', () => {
const $ = load(fixtures.eleven);
const $root = load(fixtures.eleven).root();
// An empty object should lead to an empty extraction.
// $ExpectType ExtractedMap<{}>
const emptyExtract = $root.extract({});
expect(emptyExtract).toStrictEqual({});
// Non-existent values should be undefined.
// $ExpectType ExtractedMap<{ foo: string; }>
const simpleExtract = $root.extract({ foo: 'bar' });
expect(simpleExtract).toStrictEqual({ foo: undefined });
// Existing values should be extracted.
expect<{ red: string | undefined }>(
$root.extract({ red: '.red' }),
).toStrictEqual({
red: 'Four',
});
expect<RedSelObject>(
$root.extract({ red: '.red', sel: '.sel' }),
).toStrictEqual({
red: 'Four',
sel: 'Three',
});
// Descriptors for extractions should be supported
expect<RedSelObject>(
$root.extract({
red: { selector: '.red' },
sel: { selector: '.sel' },
}),
).toStrictEqual({ red: 'Four', sel: 'Three' });
// Should support extraction of multiple values.
// $ExpectType ExtractedMap<{ red: [string]; sel: [string]; }>
const multipleExtract = $root.extract({
red: ['.red'],
sel: ['.sel'],
});
expect<RedSelMultipleObject>(multipleExtract).toStrictEqual({
red: ['Four', 'Five', 'Nine'],
sel: ['Three', 'Nine', 'Eleven'],
});
// Should support custom `prop`s.
expect<RedSelObject>(
$root.extract({
red: { selector: '.red', value: 'outerHTML' },
sel: { selector: '.sel', value: 'tagName' },
}),
).toStrictEqual({ red: '<li class="red">Four</li>', sel: 'LI' });
// Should support custom `prop`s for multiple values.
expect<{ red: string[] }>(
$root.extract({
red: [{ selector: '.red', value: 'outerHTML' }],
}),
).toStrictEqual({
red: [
'<li class="red">Four</li>',
'<li class="red">Five</li>',
'<li class="red sel">Nine</li>',
],
});
// Should support custom extraction functions.
expect<{ red: string | undefined }>(
$root.extract({
red: {
selector: '.red',
value: (el, key) => `${key}=${$(el).text()}`,
},
}),
).toStrictEqual({ red: 'red=Four' });
// Should support custom extraction functions for multiple values.
expect<{ red: string[] }>(
$root.extract({
red: [
{
selector: '.red',
value: (el, key) => `${key}=${$(el).text()}`,
},
],
}),
).toStrictEqual({ red: ['red=Four', 'red=Five', 'red=Nine'] });
// Should support extraction objects
// $ExpectType ExtractedMap<{ section: { selector: string; value: { red: string; sel: string; }; }; }>
const subExtractObject = $root.extract({
section: {
selector: 'ul:nth(1)',
value: {
red: '.red',
sel: '.blue',
},
},
});
expect<{ section: RedSelObject | undefined }>(
subExtractObject,
).toStrictEqual({
section: {
red: 'Five',
sel: 'Seven',
},
});
});
});

View File

@@ -0,0 +1,92 @@
import type { AnyNode, Element } from 'domhandler';
import type { Cheerio } from '../cheerio.js';
import type { prop } from './attributes.js';
type ExtractDescriptorFn = (
el: Element,
key: string,
// TODO: This could be typed with ExtractedMap
obj: Record<string, unknown>,
) => unknown;
interface ExtractDescriptor {
selector: string;
value?: string | ExtractDescriptorFn | ExtractMap;
}
type ExtractValue = string | ExtractDescriptor | [string | ExtractDescriptor];
export interface ExtractMap {
[key: string]: ExtractValue;
}
type ExtractedValue<V extends ExtractValue, M extends ExtractMap> = V extends [
string | ExtractDescriptor,
]
? NonNullable<ExtractedValue<V[0], M>>[]
: V extends string
? string | undefined
: V extends ExtractDescriptor
? V['value'] extends ExtractMap
? ExtractedMap<V['value']> | undefined
: V['value'] extends ExtractDescriptorFn
? ReturnType<V['value']> | undefined
: ReturnType<typeof prop> | undefined
: never;
export type ExtractedMap<M extends ExtractMap> = {
[key in keyof M]: ExtractedValue<M[key], M>;
};
function getExtractDescr(
descr: string | ExtractDescriptor,
): Required<ExtractDescriptor> {
if (typeof descr === 'string') {
return { selector: descr, value: 'textContent' };
}
return {
selector: descr.selector,
value: descr.value ?? 'textContent',
};
}
/**
* Extract multiple values from a document, and store them in an object.
*
* @param map - An object containing key-value pairs. The keys are the names of
* the properties to be created on the object, and the values are the
* selectors to be used to extract the values.
* @returns An object containing the extracted values.
*/
export function extract<M extends ExtractMap, T extends AnyNode>(
this: Cheerio<T>,
map: M,
): ExtractedMap<M> {
const ret: Record<string, unknown> = {};
for (const key in map) {
const descr = map[key];
const isArray = Array.isArray(descr);
const { selector, value } = getExtractDescr(isArray ? descr[0] : descr);
const fn: ExtractDescriptorFn =
typeof value === 'function'
? value
: typeof value === 'string'
? (el: Element) => this._make(el).prop(value)
: (el: Element) => this._make(el).extract(value);
if (isArray) {
ret[key] = this._findBySelector(selector, Number.POSITIVE_INFINITY)
.map((_, el) => fn(el, key, ret))
.get();
} else {
const $ = this._findBySelector(selector, 1);
ret[key] = $.length > 0 ? fn($[0], key, ret) : undefined;
}
}
return ret as ExtractedMap<M>;
}

View File

@@ -0,0 +1,155 @@
import { describe, it, expect, beforeEach } from 'vitest';
import { type CheerioAPI } from '../index.js';
import { cheerio, forms } from '../__fixtures__/fixtures.js';
describe('$(...)', () => {
let $: CheerioAPI;
beforeEach(() => {
$ = cheerio.load(forms);
});
describe('.serializeArray', () => {
it('() : should get form controls', () => {
expect($('form#simple').serializeArray()).toStrictEqual([
{
name: 'fruit',
value: 'Apple',
},
]);
});
it('() : should get nested form controls', () => {
expect($('form#nested').serializeArray()).toHaveLength(2);
const data = $('form#nested').serializeArray();
data.sort((a, b) => (a.value > b.value ? 1 : -1));
expect(data).toStrictEqual([
{
name: 'fruit',
value: 'Apple',
},
{
name: 'vegetable',
value: 'Carrot',
},
]);
});
it('() : should not get disabled form controls', () => {
expect($('form#disabled').serializeArray()).toStrictEqual([]);
});
it('() : should not get form controls with the wrong type', () => {
expect($('form#submit').serializeArray()).toStrictEqual([
{
name: 'fruit',
value: 'Apple',
},
]);
});
it('() : should get selected options', () => {
expect($('form#select').serializeArray()).toStrictEqual([
{
name: 'fruit',
value: 'Orange',
},
]);
});
it('() : should not get unnamed form controls', () => {
expect($('form#unnamed').serializeArray()).toStrictEqual([
{
name: 'fruit',
value: 'Apple',
},
]);
});
it('() : should get multiple selected options', () => {
expect($('form#multiple').serializeArray()).toHaveLength(2);
const data = $('form#multiple').serializeArray();
data.sort((a, b) => (a.value > b.value ? 1 : -1));
expect(data).toStrictEqual([
{
name: 'fruit',
value: 'Apple',
},
{
name: 'fruit',
value: 'Orange',
},
]);
});
it('() : should get individually selected elements', () => {
const data = $('form#nested input').serializeArray();
data.sort((a, b) => (a.value > b.value ? 1 : -1));
expect(data).toStrictEqual([
{
name: 'fruit',
value: 'Apple',
},
{
name: 'vegetable',
value: 'Carrot',
},
]);
});
it('() : should standardize line breaks', () => {
expect($('form#textarea').serializeArray()).toStrictEqual([
{
name: 'fruits',
value: 'Apple\r\nOrange',
},
]);
});
it("() : shouldn't serialize the empty string", () => {
expect($('<input value=pineapple>').serializeArray()).toStrictEqual([]);
expect(
$('<input name="" value=pineapple>').serializeArray(),
).toStrictEqual([]);
expect(
$('<input name="fruit" value=pineapple>').serializeArray(),
).toStrictEqual([
{
name: 'fruit',
value: 'pineapple',
},
]);
});
it('() : should serialize inputs without value attributes', () => {
expect($('<input name="fruit">').serializeArray()).toStrictEqual([
{
name: 'fruit',
value: '',
},
]);
});
});
describe('.serialize', () => {
it('() : should get form controls', () => {
expect($('form#simple').serialize()).toBe('fruit=Apple');
});
it('() : should get nested form controls', () => {
expect($('form#nested').serialize()).toBe('fruit=Apple&vegetable=Carrot');
});
it('() : should not get disabled form controls', () => {
expect($('form#disabled').serialize()).toBe('');
});
it('() : should get multiple selected options', () => {
expect($('form#multiple').serialize()).toBe('fruit=Apple&fruit=Orange');
});
it("() : should encode spaces as +'s", () => {
expect($('form#spaces').serialize()).toBe('fruit=Blood+orange');
});
});
});

View File

@@ -0,0 +1,103 @@
import { isTag, type AnyNode } from 'domhandler';
import type { Cheerio } from '../cheerio.js';
/*
* https://github.com/jquery/jquery/blob/2.1.3/src/manipulation/var/rcheckableType.js
* https://github.com/jquery/jquery/blob/2.1.3/src/serialize.js
*/
const submittableSelector = 'input,select,textarea,keygen';
const r20 = /%20/g;
const rCRLF = /\r?\n/g;
/**
* Encode a set of form elements as a string for submission.
*
* @category Forms
* @example
*
* ```js
* $('<form><input name="foo" value="bar" /></form>').serialize();
* //=> 'foo=bar'
* ```
*
* @returns The serialized form.
* @see {@link https://api.jquery.com/serialize/}
*/
export function serialize<T extends AnyNode>(this: Cheerio<T>): string {
// Convert form elements into name/value objects
const arr = this.serializeArray();
// Serialize each element into a key/value string
const retArr = arr.map(
(data) =>
`${encodeURIComponent(data.name)}=${encodeURIComponent(data.value)}`,
);
// Return the resulting serialization
return retArr.join('&').replace(r20, '+');
}
/**
* Encode a set of form elements as an array of names and values.
*
* @category Forms
* @example
*
* ```js
* $('<form><input name="foo" value="bar" /></form>').serializeArray();
* //=> [ { name: 'foo', value: 'bar' } ]
* ```
*
* @returns The serialized form.
* @see {@link https://api.jquery.com/serializeArray/}
*/
export function serializeArray<T extends AnyNode>(
this: Cheerio<T>,
): {
name: string;
value: string;
}[] {
// Resolve all form elements from either forms or collections of form elements
return this.map((_, elem) => {
const $elem = this._make(elem);
if (isTag(elem) && elem.name === 'form') {
return $elem.find(submittableSelector).toArray();
}
return $elem.filter(submittableSelector).toArray();
})
.filter(
// Verify elements have a name (`attr.name`) and are not disabled (`:enabled`)
'[name!=""]:enabled' +
// And cannot be clicked (`[type=submit]`) or are used in `x-www-form-urlencoded` (`[type=file]`)
':not(:submit, :button, :image, :reset, :file)' +
// And are either checked/don't have a checkable state
':matches([checked], :not(:checkbox, :radio))',
// Convert each of the elements to its value(s)
)
.map<
AnyNode,
{
name: string;
value: string;
}
>((_, elem) => {
const $elem = this._make(elem);
const name = $elem.attr('name') as string; // We have filtered for elements with a name before.
// If there is no value set (e.g. `undefined`, `null`), then default value to empty
const value = $elem.val() ?? '';
// If we have an array of values (e.g. `<select multiple>`), return an array of key/value pairs
if (Array.isArray(value)) {
return value.map((val) =>
/*
* We trim replace any line endings (e.g. `\r` or `\r\n` with `\r\n`) to guarantee consistency across platforms
* These can occur inside of `<textarea>'s`
*/
({ name, value: val.replace(rCRLF, '\r\n') }),
);
}
// Otherwise (e.g. `<input type="text">`, return only one key/value pair
return { name, value: value.replace(rCRLF, '\r\n') };
})
.toArray();
}

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,455 @@
import { describe, it, expect } from 'vitest';
import { parseDOM } from 'htmlparser2';
import { type Cheerio } from './index.js';
import { cheerio, fruits, food, noscript } from './__fixtures__/fixtures.js';
import type { Element } from 'domhandler';
declare module './index.js' {
interface Cheerio<T> {
myPlugin(...args: unknown[]): {
context: Cheerio<T>;
args: unknown[];
};
foo(): void;
}
}
function testAppleSelect($apple: ArrayLike<Element>) {
expect($apple).toHaveLength(1);
const apple = $apple[0];
expect(apple.parentNode).toHaveProperty('tagName', 'ul');
expect(apple.prev).toBe(null);
expect((apple.next as Element).attribs).toHaveProperty('class', 'orange');
expect(apple.childNodes).toHaveLength(1);
expect(apple.childNodes[0]).toHaveProperty('data', 'Apple');
}
describe('cheerio', () => {
it('cheerio(null) should be empty', () => {
expect(cheerio(null as never)).toHaveLength(0);
});
it('cheerio(undefined) should be empty', () => {
expect(cheerio(undefined)).toHaveLength(0);
});
it("cheerio('') should be empty", () => {
expect(cheerio('')).toHaveLength(0);
});
it('cheerio(selector) with no context or root should be empty', () => {
expect(cheerio('.h2')).toHaveLength(0);
expect(cheerio('#fruits')).toHaveLength(0);
});
it('cheerio(node) : should override previously-loaded nodes', () => {
const $ = cheerio.load('<div><span></span></div>');
const spanNode = $('span')[0];
const $span = $(spanNode);
expect($span[0]).toBe(spanNode);
});
it('should be able to create html without a root or context', () => {
const $h2 = cheerio('<h2>');
expect($h2).not.toHaveLength(0);
expect($h2).toHaveLength(1);
expect($h2[0]).toHaveProperty('tagName', 'h2');
});
it('should be able to create complicated html', () => {
const $script = cheerio(
'<script src="script.js" type="text/javascript"></script>',
) as Cheerio<Element>;
expect($script).not.toHaveLength(0);
expect($script).toHaveLength(1);
expect($script[0].attribs).toHaveProperty('src', 'script.js');
expect($script[0].attribs).toHaveProperty('type', 'text/javascript');
expect($script[0].childNodes).toHaveLength(0);
});
// eslint-disable-next-line vitest/expect-expect
it('should be able to select .apple with only a context', () => {
const $apple = cheerio('.apple', fruits);
testAppleSelect($apple);
});
// eslint-disable-next-line vitest/expect-expect
it('should be able to select .apple with a node as context', () => {
const $apple = cheerio('.apple', cheerio(fruits)[0]);
testAppleSelect($apple);
});
// eslint-disable-next-line vitest/expect-expect
it('should be able to select .apple with only a root', () => {
const $apple = cheerio('.apple', null, fruits);
testAppleSelect($apple);
});
it('should be able to select an id', () => {
const $fruits = cheerio('#fruits', null, fruits);
expect($fruits).toHaveLength(1);
expect($fruits[0].attribs).toHaveProperty('id', 'fruits');
});
it('should be able to select a tag', () => {
const $ul = cheerio('ul', fruits);
expect($ul).toHaveLength(1);
expect($ul[0].tagName).toBe('ul');
});
it('should accept a node reference as a context', () => {
const $elems = cheerio('<div><span></span></div>');
expect(cheerio('span', $elems[0])).toHaveLength(1);
});
it('should accept an array of node references as a context', () => {
const $elems = cheerio('<div><span></span></div>');
expect(cheerio('span', $elems.toArray())).toHaveLength(1);
});
it('should select only elements inside given context (Issue #193)', () => {
const $ = cheerio.load(food);
const $fruits = $('#fruits');
const fruitElements = $('li', $fruits);
expect(fruitElements).toHaveLength(3);
});
it('should be able to select multiple tags', () => {
const $fruits = cheerio('li', null, fruits);
expect($fruits).toHaveLength(3);
const classes = ['apple', 'orange', 'pear'];
$fruits.each((idx, $fruit) => {
expect($fruit.attribs).toHaveProperty('class', classes[idx]);
});
});
// eslint-disable-next-line vitest/expect-expect
it('should be able to do: cheerio("#fruits .apple")', () => {
const $apple = cheerio('#fruits .apple', fruits);
testAppleSelect($apple);
});
// eslint-disable-next-line vitest/expect-expect
it('should be able to do: cheerio("li.apple")', () => {
const $apple = cheerio('li.apple', fruits);
testAppleSelect($apple);
});
// eslint-disable-next-line vitest/expect-expect
it('should be able to select by attributes', () => {
const $apple = cheerio('li[class=apple]', fruits);
testAppleSelect($apple);
});
it('should be able to select multiple classes: cheerio(".btn.primary")', () => {
const $a = cheerio(
'.btn.primary',
'<p><a class="btn primary" href="#">Save</a></p>',
);
expect($a).toHaveLength(1);
expect($a[0].childNodes[0]).toHaveProperty('data', 'Save');
});
it('should not create a top-level node', () => {
const $elem = cheerio('* div', '<div>');
expect($elem).toHaveLength(0);
});
it('should be able to select multiple elements: cheerio(".apple, #fruits")', () => {
const $elems = cheerio('.apple, #fruits', fruits);
expect($elems).toHaveLength(2);
const $apple = $elems
.toArray()
.filter((elem) => elem.attribs['class'] === 'apple');
const $fruit = $elems
.toArray()
.find((elem) => elem.attribs['id'] === 'fruits');
testAppleSelect($apple);
expect($fruit?.attribs).toHaveProperty('id', 'fruits');
});
it('should select first element cheerio(:first)', () => {
const $elem = cheerio('li:first', fruits);
expect($elem.attr('class')).toBe('apple');
const $filtered = cheerio('li', fruits).filter(':even');
expect($filtered).toHaveLength(2);
});
it('should be able to select immediate children: cheerio("#fruits > .pear")', () => {
const $food = cheerio(food);
cheerio('.pear', $food).append('<li class="pear">Another Pear!</li>');
expect(cheerio('#fruits .pear', $food)).toHaveLength(2);
const $elem = cheerio('#fruits > .pear', $food);
expect($elem).toHaveLength(1);
expect($elem.attr('class')).toBe('pear');
});
it('should be able to select immediate children: cheerio(".apple + .pear")', () => {
expect(cheerio('.apple + li', fruits)).toHaveLength(1);
expect(cheerio('.apple + .pear', fruits)).toHaveLength(0);
const $elem = cheerio('.apple + .orange', fruits);
expect($elem).toHaveLength(1);
expect($elem.attr('class')).toBe('orange');
});
it('should be able to select immediate children: cheerio(".apple ~ .pear")', () => {
expect(cheerio('.apple ~ li', fruits)).toHaveLength(2);
expect(cheerio('.apple ~ .pear', fruits).attr('class')).toBe('pear');
});
it('should handle wildcards on attributes: cheerio("li[class*=r]")', () => {
const $elem = cheerio('li[class*=r]', fruits);
expect($elem).toHaveLength(2);
expect($elem.eq(0).attr('class')).toBe('orange');
expect($elem.eq(1).attr('class')).toBe('pear');
});
it('should handle beginning of attr selectors: cheerio("li[class^=o]")', () => {
const $elem = cheerio('li[class^=o]', fruits);
expect($elem).toHaveLength(1);
expect($elem.eq(0).attr('class')).toBe('orange');
});
it('should handle beginning of attr selectors: cheerio("li[class$=e]")', () => {
const $elem = cheerio('li[class$=e]', fruits);
expect($elem).toHaveLength(2);
expect($elem.eq(0).attr('class')).toBe('apple');
expect($elem.eq(1).attr('class')).toBe('orange');
});
it('(extended Array) should not interfere with prototype methods (issue #119)', () => {
const extended: any = [];
extended.find =
extended.children =
extended.each =
function () {
/* Ignore */
};
const $empty = cheerio(extended);
expect($empty.find).toBe(cheerio.prototype.find);
expect($empty.children).toBe(cheerio.prototype.children);
expect($empty.each).toBe(cheerio.prototype.each);
});
it('cheerio.html(null) should return a "" string', () => {
expect(cheerio.html(null as never)).toBe('');
});
it('should set html(number) as a string', () => {
const $elem = cheerio('<div>');
$elem.html(123 as never);
expect(typeof $elem.text()).toBe('string');
});
it('should set text(number) as a string', () => {
const $elem = cheerio('<div>');
$elem.text(123 as never);
expect(typeof $elem.text()).toBe('string');
});
describe('.load', () => {
it('should generate selections as proper instances', () => {
const $ = cheerio.load(fruits);
expect($('.apple')).toBeInstanceOf($);
});
// Issue #1092
it('should handle a character `)` in `:contains` selector', () => {
const result = cheerio.load('<p>)aaa</p>')(
String.raw`:contains('\)aaa')`,
);
expect(result).toHaveLength(3);
expect(result.first().prop('tagName')).toBe('HTML');
expect(result.eq(1).prop('tagName')).toBe('BODY');
expect(result.last().prop('tagName')).toBe('P');
});
it('should be able to filter down using the context', () => {
const $ = cheerio.load(fruits);
const apple = $('.apple', 'ul');
const lis = $('li', 'ul');
expect(apple).toHaveLength(1);
expect(lis).toHaveLength(3);
});
it('should preserve root content', () => {
const $ = cheerio.load(fruits);
// Root should not be overwritten
const el = $('<div></div>');
expect(Object.is(el, el._root)).toBe(false);
// Query has to have results
expect($('li', 'ul')).toHaveLength(3);
});
it('should allow loading a pre-parsed DOM', () => {
const dom = parseDOM(food);
const $ = cheerio.load(dom);
expect($('ul')).toHaveLength(3);
});
it('should allow loading a single element', () => {
const el = parseDOM(food)[0];
const $ = cheerio.load(el);
expect($('ul')).toHaveLength(3);
});
it('should render xml in html() when options.xml = true', () => {
const str = '<MixedCaseTag UPPERCASEATTRIBUTE=""></MixedCaseTag>';
const expected = '<MixedCaseTag UPPERCASEATTRIBUTE=""/>';
const $ = cheerio.load(str, { xml: true });
expect($('MixedCaseTag').get(0)).toHaveProperty(
'tagName',
'MixedCaseTag',
);
expect($.html()).toBe(expected);
});
it('should render xml in html() when options.xml = true passed to html()', () => {
const str = '<MixedCaseTag UPPERCASEATTRIBUTE=""></MixedCaseTag>';
// Since parsing done without xml flag, all tags converted to lowercase
const expectedXml =
'<html><head/><body><mixedcasetag uppercaseattribute=""/></body></html>';
const expectedNoXml =
'<html><head></head><body><mixedcasetag uppercaseattribute=""></mixedcasetag></body></html>';
const $ = cheerio.load(str);
expect($('MixedCaseTag').get(0)).toHaveProperty(
'tagName',
'mixedcasetag',
);
expect($.html()).toBe(expectedNoXml);
expect($.html({ xml: true })).toBe(expectedXml);
});
it('should respect options on the element level', () => {
const str =
'<!doctype html><html><head><title>Some test</title></head><body><footer><p>Copyright &copy; 2003-2014</p></footer></body></html>';
const expectedHtml = '<p>Copyright &copy; 2003-2014</p>';
const expectedXml = '<p>Copyright © 2003-2014</p>';
const domNotEncoded = cheerio.load(str, {
xml: { decodeEntities: false },
});
const domEncoded = cheerio.load(str);
expect(domNotEncoded('footer').html()).toBe(expectedHtml);
expect(domEncoded('footer').html()).toBe(expectedXml);
});
it('should use htmlparser2 if xml option is used', () => {
const str = '<div></div>';
const dom = cheerio.load(str, null, false);
expect(dom.html()).toBe(str);
});
it('should return a fully-qualified Function', () => {
const $ = cheerio.load('<div>');
expect($).toBeInstanceOf(Function);
});
describe('prototype extensions', () => {
it('should honor extensions defined on `prototype` property', () => {
const $ = cheerio.load('<div>');
$.prototype.myPlugin = function (...args: unknown[]) {
return {
context: this,
args,
};
};
const $div = $('div');
expect(typeof $div.myPlugin).toBe('function');
expect($div.myPlugin().context).toBe($div);
expect($div.myPlugin(1, 2, 3).args).toStrictEqual([1, 2, 3]);
});
it('should honor extensions defined on `fn` property', () => {
const $ = cheerio.load('<div>');
$.fn.myPlugin = function (...args: unknown[]) {
return {
context: this,
args,
};
};
const $div = $('div');
expect(typeof $div.myPlugin).toBe('function');
expect($div.myPlugin().context).toBe($div);
expect($div.myPlugin(1, 2, 3).args).toStrictEqual([1, 2, 3]);
});
it('should isolate extensions between loaded functions', () => {
const $a = cheerio.load('<div>');
const $b = cheerio.load('<div>');
$a.prototype.foo = function () {
/* Ignore */
};
expect($b('div').foo).toBeUndefined();
});
});
});
describe('parse5 options', () => {
// Should parse noscript tags only with false option value
it('{scriptingEnabled: ???}', () => {
// [default] `scriptingEnabled: true` - tag contains one text element
const withScripts = cheerio.load(noscript)('noscript');
expect(withScripts).toHaveLength(1);
expect(withScripts[0].children).toHaveLength(1);
expect(withScripts[0].children[0].type).toBe('text');
// `scriptingEnabled: false` - content of noscript will parsed
const noScripts = cheerio.load(noscript, { scriptingEnabled: false })(
'noscript',
);
expect(noScripts).toHaveLength(1);
expect(noScripts[0].children).toHaveLength(2);
expect(noScripts[0].children[0].type).toBe('comment');
expect(noScripts[0].children[1].type).toBe('tag');
expect(noScripts[0].children[1]).toHaveProperty('name', 'a');
// `scriptingEnabled: ???` - should acts as true
for (const val of [undefined, null, 0, '']) {
const options = { scriptingEnabled: val as never };
const result = cheerio.load(noscript, options)('noscript');
expect(result).toHaveLength(1);
expect(result[0].children).toHaveLength(1);
expect(result[0].children[0].type).toBe('text');
}
});
// Should contain location data only with truthful option value
it('{sourceCodeLocationInfo: ???}', () => {
// Location data should not be present
for (const val of [undefined, null, 0, false, '']) {
const options = { sourceCodeLocationInfo: val as never };
const result = cheerio.load(noscript, options)('noscript');
expect(result).toHaveLength(1);
expect(result[0]).not.toHaveProperty('sourceCodeLocation');
}
// Location data should be present
for (const val of [true, 1, 'test']) {
const options = { sourceCodeLocationInfo: val as never };
const result = cheerio.load(noscript, options)('noscript');
expect(result).toHaveLength(1);
expect(result[0]).toHaveProperty('sourceCodeLocation');
expect(typeof result[0].sourceCodeLocation).toBe('object');
}
});
});
});

View File

@@ -0,0 +1,143 @@
/* eslint-disable @typescript-eslint/no-unsafe-declaration-merging */
import type { InternalOptions } from './options.js';
import type { AnyNode, Document, ParentNode } from 'domhandler';
import type { BasicAcceptedElems } from './types.js';
import * as Attributes from './api/attributes.js';
import * as Traversing from './api/traversing.js';
import * as Manipulation from './api/manipulation.js';
import * as Css from './api/css.js';
import * as Forms from './api/forms.js';
import * as Extract from './api/extract.js';
type MethodsType = typeof Attributes &
typeof Traversing &
typeof Manipulation &
typeof Css &
typeof Forms &
typeof Extract;
/**
* The cheerio class is the central class of the library. It wraps a set of
* elements and provides an API for traversing, modifying, and interacting with
* the set.
*
* Loading a document will return the Cheerio class bound to the root element of
* the document. The class will be instantiated when querying the document (when
* calling `$('selector')`).
*
* @example This is the HTML markup we will be using in all of the API examples:
*
* ```html
* <ul id="fruits">
* <li class="apple">Apple</li>
* <li class="orange">Orange</li>
* <li class="pear">Pear</li>
* </ul>
* ```
*/
export abstract class Cheerio<T> implements ArrayLike<T> {
length = 0;
[index: number]: T;
options: InternalOptions;
/**
* The root of the document. Can be set by using the `root` argument of the
* constructor.
*
* @private
*/
_root: Cheerio<Document> | null;
/**
* Instance of cheerio. Methods are specified in the modules. Usage of this
* constructor is not recommended. Please use `$.load` instead.
*
* @private
* @param elements - The new selection.
* @param root - Sets the root node.
* @param options - Options for the instance.
*/
constructor(
elements: ArrayLike<T> | undefined,
root: Cheerio<Document> | null,
options: InternalOptions,
) {
this.options = options;
this._root = root;
if (elements) {
for (let idx = 0; idx < elements.length; idx++) {
this[idx] = elements[idx];
}
this.length = elements.length;
}
}
prevObject: Cheerio<any> | undefined;
/**
* Make a cheerio object.
*
* @private
* @param dom - The contents of the new object.
* @param context - The context of the new object.
* @returns The new cheerio object.
*/
abstract _make<T>(
dom: ArrayLike<T> | T | string,
context?: BasicAcceptedElems<AnyNode>,
): Cheerio<T>;
/**
* Parses some content.
*
* @private
* @param content - Content to parse.
* @param options - Options for parsing.
* @param isDocument - Allows parser to be switched to fragment mode.
* @returns A document containing the `content`.
*/
abstract _parse(
content: string | Document | AnyNode | AnyNode[] | Buffer,
options: InternalOptions,
isDocument: boolean,
context: ParentNode | null,
): Document;
/**
* Render an element or a set of elements.
*
* @private
* @param dom - DOM to render.
* @returns The rendered DOM.
*/
abstract _render(dom: AnyNode | ArrayLike<AnyNode>): string;
}
export interface Cheerio<T> extends MethodsType, Iterable<T> {
cheerio: '[cheerio object]';
splice: typeof Array.prototype.splice;
}
/** Set a signature of the object. */
Cheerio.prototype.cheerio = '[cheerio object]';
/*
* Make cheerio an array-like object
*/
Cheerio.prototype.splice = Array.prototype.splice;
// Support for (const element of $(...)) iteration:
Cheerio.prototype[Symbol.iterator] = Array.prototype[Symbol.iterator];
// Plug in the API
Object.assign(
Cheerio.prototype,
Attributes,
Traversing,
Manipulation,
Css,
Forms,
Extract,
);

View File

@@ -0,0 +1,10 @@
export type * from './types.js';
export type {
Cheerio,
CheerioAPI,
CheerioOptions,
HTMLParser2Options,
} from './slim.js';
export { contains, merge } from './static.js';
export * from './load-parse.js';

View File

@@ -0,0 +1,180 @@
import { describe, it, expect, afterEach } from 'vitest';
import * as cheerio from './index.js';
import { Writable } from 'node:stream';
import { createServer, type Server } from 'node:http';
function noop() {
// Ignore
}
// Returns a promise and a resolve function
function getPromise() {
let cb: (error: Error | null | undefined, $: cheerio.CheerioAPI) => void;
const promise = new Promise<cheerio.CheerioAPI>((resolve, reject) => {
cb = (error, $) => (error ? reject(error) : resolve($));
});
return { promise, cb: cb! };
}
const TEST_HTML = '<h1>Hello World</h1>';
const TEST_HTML_UTF16 = Buffer.from(TEST_HTML, 'utf16le');
const TEST_HTML_UTF16_BOM = Buffer.from([
// UTF16-LE BOM
0xff,
0xfe,
...Array.from(TEST_HTML_UTF16),
]);
describe('loadBuffer', () => {
it('should parse UTF-8 HTML', () => {
const $ = cheerio.loadBuffer(Buffer.from(TEST_HTML));
expect($.html()).toBe(
`<html><head></head><body>${TEST_HTML}</body></html>`,
);
});
it('should parse UTF-16 HTML', () => {
const $ = cheerio.loadBuffer(TEST_HTML_UTF16_BOM);
expect($.html()).toBe(
`<html><head></head><body>${TEST_HTML}</body></html>`,
);
});
});
describe('stringStream', () => {
it('should use parse5 by default', async () => {
const { promise, cb } = getPromise();
const stream = cheerio.stringStream({}, cb);
expect(stream).toBeInstanceOf(Writable);
stream.end(TEST_HTML);
const $ = await promise;
expect($.html()).toBe(
`<html><head></head><body>${TEST_HTML}</body></html>`,
);
});
it('should error from parse5 on buffer', () => {
const stream = cheerio.stringStream({}, noop);
expect(stream).toBeInstanceOf(Writable);
expect(() => stream.write(Buffer.from(TEST_HTML))).toThrow(
'Parser can work only with string streams.',
);
});
it('should use htmlparser2 for XML', async () => {
const { promise, cb } = getPromise();
const stream = cheerio.stringStream({ xmlMode: true }, cb);
expect(stream).toBeInstanceOf(Writable);
stream.end(TEST_HTML);
const $ = await promise;
expect($.html()).toBe(TEST_HTML);
});
});
describe('decodeStream', () => {
it('should use parse5 by default', async () => {
const { promise, cb } = getPromise();
const stream = cheerio.decodeStream({}, cb);
expect(stream).toBeInstanceOf(Writable);
stream.end(TEST_HTML_UTF16_BOM);
const $ = await promise;
expect($.html()).toBe(
`<html><head></head><body>${TEST_HTML}</body></html>`,
);
});
it('should use htmlparser2 for XML', async () => {
const { promise, cb } = getPromise();
const stream = cheerio.decodeStream({ xmlMode: true }, cb);
expect(stream).toBeInstanceOf(Writable);
stream.end(TEST_HTML_UTF16_BOM);
const $ = await promise;
expect($.html()).toBe(TEST_HTML);
});
});
describe('fromURL', () => {
let server: Server | undefined;
function createTestServer(
contentType: string,
body: string | Buffer,
): Promise<number> {
return new Promise((resolve, reject) => {
server = createServer((_req, res) => {
res.writeHead(200, { 'Content-Type': contentType });
res.end(body);
});
server.listen(0, () => {
const address = server?.address();
if (typeof address === 'string' || address == null) {
reject(new Error('Failed to get port'));
} else {
resolve(address.port);
}
});
});
}
afterEach(
async () =>
new Promise<void>((resolve, reject) => {
if (server) {
server.close((err) => (err ? reject(err) : resolve()));
server = undefined;
} else {
resolve();
}
}),
);
it('should fetch UTF-8 HTML', async () => {
const port = await createTestServer('text/html', TEST_HTML);
const $ = await cheerio.fromURL(`http://localhost:${port}`);
expect($.html()).toBe(
`<html><head></head><body>${TEST_HTML}</body></html>`,
);
});
it('should fetch UTF-16 HTML', async () => {
const port = await createTestServer(
'text/html; charset=utf-16le',
TEST_HTML_UTF16,
);
const $ = await cheerio.fromURL(`http://localhost:${port}`);
expect($.html()).toBe(
`<html><head></head><body>${TEST_HTML}</body></html>`,
);
});
it('should parse XML based on Content-Type', async () => {
const port = await createTestServer('text/xml', TEST_HTML);
const $ = await cheerio.fromURL(`http://localhost:${port}`);
expect($.html()).toBe(TEST_HTML);
});
});

View File

@@ -0,0 +1,274 @@
/**
* @file Batteries-included version of Cheerio. This module includes several
* convenience methods for loading documents from various sources.
*/
export * from './load-parse.js';
export { contains, merge } from './static.js';
export type * from './types.js';
export type {
Cheerio,
CheerioAPI,
CheerioOptions,
HTMLParser2Options,
} from './slim.js';
import { adapter as htmlparser2Adapter } from 'parse5-htmlparser2-tree-adapter';
import * as htmlparser2 from 'htmlparser2';
import { ParserStream as Parse5Stream } from 'parse5-parser-stream';
import {
decodeBuffer,
DecodeStream,
type SnifferOptions,
} from 'encoding-sniffer';
import * as undici from 'undici';
import MIMEType from 'whatwg-mimetype';
import { Writable, finished } from 'node:stream';
import type { CheerioAPI } from './load.js';
import {
flattenOptions,
type InternalOptions,
type CheerioOptions,
} from './options.js';
import { load } from './load-parse.js';
/**
* Sniffs the encoding of a buffer, then creates a querying function bound to a
* document created from the buffer.
*
* @category Loading
* @example
*
* ```js
* import * as cheerio from 'cheerio';
*
* const buffer = fs.readFileSync('index.html');
* const $ = cheerio.fromBuffer(buffer);
* ```
*
* @param buffer - The buffer to sniff the encoding of.
* @param options - The options to pass to Cheerio.
* @returns The loaded document.
*/
export function loadBuffer(
buffer: Buffer,
options: DecodeStreamOptions = {},
): CheerioAPI {
const opts = flattenOptions(options);
const str = decodeBuffer(buffer, {
defaultEncoding: opts?.xmlMode ? 'utf8' : 'windows-1252',
...options.encoding,
});
return load(str, opts);
}
function _stringStream(
options: InternalOptions | undefined,
cb: (err: Error | null | undefined, $: CheerioAPI) => void,
): Writable {
if (options?._useHtmlParser2) {
const parser = htmlparser2.createDocumentStream(
(err, document) => cb(err, load(document)),
options,
);
return new Writable({
decodeStrings: false,
write(chunk, _encoding, callback) {
if (typeof chunk !== 'string') {
throw new TypeError('Expected a string');
}
parser.write(chunk);
callback();
},
final(callback) {
parser.end();
callback();
},
});
}
options ??= {};
options.treeAdapter ??= htmlparser2Adapter;
if (options.scriptingEnabled !== false) {
options.scriptingEnabled = true;
}
const stream = new Parse5Stream(options);
finished(stream, (err) => cb(err, load(stream.document)));
return stream;
}
/**
* Creates a stream that parses a sequence of strings into a document.
*
* The stream is a `Writable` stream that accepts strings. When the stream is
* finished, the callback is called with the loaded document.
*
* @category Loading
* @example
*
* ```js
* import * as cheerio from 'cheerio';
* import * as fs from 'fs';
*
* const writeStream = cheerio.stringStream({}, (err, $) => {
* if (err) {
* // Handle error
* }
*
* console.log($('h1').text());
* // Output: Hello, world!
* });
*
* fs.createReadStream('my-document.html', { encoding: 'utf8' }).pipe(
* writeStream,
* );
* ```
*
* @param options - The options to pass to Cheerio.
* @param cb - The callback to call when the stream is finished.
* @returns The writable stream.
*/
export function stringStream(
options: CheerioOptions,
cb: (err: Error | null | undefined, $: CheerioAPI) => void,
): Writable {
return _stringStream(flattenOptions(options), cb);
}
export interface DecodeStreamOptions extends CheerioOptions {
encoding?: SnifferOptions;
}
/**
* Parses a stream of buffers into a document.
*
* The stream is a `Writable` stream that accepts buffers. When the stream is
* finished, the callback is called with the loaded document.
*
* @category Loading
* @param options - The options to pass to Cheerio.
* @param cb - The callback to call when the stream is finished.
* @returns The writable stream.
*/
export function decodeStream(
options: DecodeStreamOptions,
cb: (err: Error | null | undefined, $: CheerioAPI) => void,
): Writable {
const { encoding = {}, ...cheerioOptions } = options;
const opts = flattenOptions(cheerioOptions);
// Set the default encoding to UTF-8 for XML mode
encoding.defaultEncoding ??= opts?.xmlMode ? 'utf8' : 'windows-1252';
const decodeStream = new DecodeStream(encoding);
const loadStream = _stringStream(opts, cb);
decodeStream.pipe(loadStream);
return decodeStream;
}
type UndiciStreamOptions = Parameters<typeof undici.stream>[1];
export interface CheerioRequestOptions extends DecodeStreamOptions {
/** The options passed to `undici`'s `stream` method. */
requestOptions?: UndiciStreamOptions;
}
const defaultRequestOptions: UndiciStreamOptions = {
method: 'GET',
// Allow redirects by default
maxRedirections: 5,
// NOTE: `throwOnError` currently doesn't work https://github.com/nodejs/undici/issues/1753
throwOnError: true,
// Set an Accept header
headers: {
accept: 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
},
};
/**
* `fromURL` loads a document from a URL.
*
* By default, redirects are allowed and non-2xx responses are rejected.
*
* @category Loading
* @example
*
* ```js
* import * as cheerio from 'cheerio';
*
* const $ = await cheerio.fromURL('https://example.com');
* ```
*
* @param url - The URL to load the document from.
* @param options - The options to pass to Cheerio.
* @returns The loaded document.
*/
export async function fromURL(
url: string | URL,
options: CheerioRequestOptions = {},
): Promise<CheerioAPI> {
const {
requestOptions = defaultRequestOptions,
encoding = {},
...cheerioOptions
} = options;
let undiciStream: Promise<undici.Dispatcher.StreamData> | undefined;
// Add headers if none were supplied.
requestOptions.headers ??= defaultRequestOptions.headers;
const promise = new Promise<CheerioAPI>((resolve, reject) => {
undiciStream = undici.stream(url, requestOptions, (res) => {
const contentType = res.headers['content-type'] ?? 'text/html';
const mimeType = new MIMEType(
Array.isArray(contentType) ? contentType[0] : contentType,
);
if (!mimeType.isHTML() && !mimeType.isXML()) {
throw new RangeError(
`The content-type "${contentType}" is neither HTML nor XML.`,
);
}
// Forward the charset from the header to the decodeStream.
encoding.transportLayerEncodingLabel = mimeType.parameters.get('charset');
/*
* If we allow redirects, we will have entries in the history.
* The last entry will be the final URL.
*/
const history = (
res.context as
| {
history?: URL[];
}
| undefined
)?.history;
const opts = {
encoding,
// Set XML mode based on the MIME type.
xmlMode: mimeType.isXML(),
// Set the `baseURL` to the final URL.
baseURL: history ? history[history.length - 1] : url,
...cheerioOptions,
};
return decodeStream(opts, (err, $) => (err ? reject(err) : resolve($)));
});
});
// Let's make sure the request is completed before returning the promise.
await undiciStream;
return promise;
}

View File

@@ -0,0 +1,39 @@
import { type CheerioAPI, getLoad } from './load.js';
import { getParse } from './parse.js';
import { renderWithParse5, parseWithParse5 } from './parsers/parse5-adapter.js';
import type { CheerioOptions } from './options.js';
import renderWithHtmlparser2 from 'dom-serializer';
import { parseDocument as parseWithHtmlparser2 } from 'htmlparser2';
import type { AnyNode } from 'domhandler';
const parse = getParse((content, options, isDocument, context) =>
options._useHtmlParser2
? parseWithHtmlparser2(content, options)
: parseWithParse5(content, options, isDocument, context),
);
// Duplicate docs due to https://github.com/TypeStrong/typedoc/issues/1616
/**
* Create a querying function, bound to a document created from the provided
* markup.
*
* Note that similar to web browser contexts, this operation may introduce
* `<html>`, `<head>`, and `<body>` elements; set `isDocument` to `false` to
* switch to fragment mode and disable this.
*
* @category Loading
* @param content - Markup to be loaded.
* @param options - Options for the created instance.
* @param isDocument - Allows parser to be switched to fragment mode.
* @returns The loaded document.
* @see {@link https://cheerio.js.org#loading} for additional usage information.
*/
export const load: (
content: string | AnyNode | AnyNode[] | Buffer,
options?: CheerioOptions | null,
isDocument?: boolean,
) => CheerioAPI = getLoad(parse, (dom, options) =>
options._useHtmlParser2
? renderWithHtmlparser2(dom, options)
: renderWithParse5(dom),
);

View File

@@ -0,0 +1,31 @@
import { describe, it, expect } from 'vitest';
import { load } from './index.js';
describe('.load', () => {
it('(html) : should retain original root after creating a new node', () => {
const $ = load('<body><ul id="fruits"></ul></body>');
expect($('body')).toHaveLength(1);
$('<script>');
expect($('body')).toHaveLength(1);
});
it('(html) : should handle lowercase tag options', () => {
const $ = load('<BODY><ul id="fruits"></ul></BODY>', {
xml: { lowerCaseTags: true },
});
expect($.html()).toBe('<body><ul id="fruits"/></body>');
});
it('(html) : should handle xml tag option', () => {
const $ = load('<body><script><foo></script></body>', {
xml: true,
});
expect($('script')[0].children[0].type).toBe('tag');
});
it('(buffer) : should accept a buffer', () => {
const html = '<html><head></head><body>foo</body></html>';
const $html = load(Buffer.from(html));
expect($html.html()).toBe(html);
});
});

View File

@@ -0,0 +1,277 @@
import {
type CheerioOptions,
type InternalOptions,
flattenOptions,
} from './options.js';
import * as staticMethods from './static.js';
import { Cheerio } from './cheerio.js';
import { isHtml, isCheerio } from './utils.js';
import type { AnyNode, Document, Element, ParentNode } from 'domhandler';
import type { SelectorType, BasicAcceptedElems } from './types.js';
type StaticType = typeof staticMethods;
/**
* A querying function, bound to a document created from the provided markup.
*
* Also provides several helper methods for dealing with the document as a
* whole.
*/
export interface CheerioAPI extends StaticType {
/**
* This selector method is the starting point for traversing and manipulating
* the document. Like jQuery, it's the primary method for selecting elements
* in the document.
*
* `selector` searches within the `context` scope, which searches within the
* `root` scope.
*
* @example
*
* ```js
* $('ul .pear').attr('class');
* //=> pear
*
* $('li[class=orange]').html();
* //=> Orange
*
* $('.apple', '#fruits').text();
* //=> Apple
* ```
*
* Optionally, you can also load HTML by passing the string as the selector:
*
* ```js
* $('<ul id="fruits">...</ul>');
* ```
*
* Or the context:
*
* ```js
* $('ul', '<ul id="fruits">...</ul>');
* ```
*
* Or as the root:
*
* ```js
* $('li', 'ul', '<ul id="fruits">...</ul>');
* ```
*
* @param selector - Either a selector to look for within the document, or the
* contents of a new Cheerio instance.
* @param context - Either a selector to look for within the root, or the
* contents of the document to query.
* @param root - Optional HTML document string.
*/
<T extends AnyNode, S extends string>(
selector?: S | BasicAcceptedElems<T>,
context?: BasicAcceptedElems<AnyNode> | null,
root?: BasicAcceptedElems<Document>,
options?: CheerioOptions,
): Cheerio<S extends SelectorType ? Element : T>;
/**
* The root the document was originally loaded with.
*
* @private
*/
_root: Document;
/**
* The options the document was originally loaded with.
*
* @private
*/
_options: InternalOptions;
/** Mimic jQuery's prototype alias for plugin authors. */
fn: typeof Cheerio.prototype;
/**
* The `.load` static method defined on the "loaded" Cheerio factory function
* is deprecated. Users are encouraged to instead use the `load` function
* exported by the Cheerio module.
*
* @deprecated Use the `load` function exported by the Cheerio module.
* @category Deprecated
* @example
*
* ```js
* const $ = cheerio.load('<h1>Hello, <span>world</span>.</h1>');
* ```
*/
load: ReturnType<typeof getLoad>;
}
export function getLoad(
parse: typeof Cheerio.prototype._parse,
render: (
dom: AnyNode | ArrayLike<AnyNode>,
options: InternalOptions,
) => string,
) {
/**
* Create a querying function, bound to a document created from the provided
* markup.
*
* Note that similar to web browser contexts, this operation may introduce
* `<html>`, `<head>`, and `<body>` elements; set `isDocument` to `false` to
* switch to fragment mode and disable this.
*
* @param content - Markup to be loaded.
* @param options - Options for the created instance.
* @param isDocument - Allows parser to be switched to fragment mode.
* @returns The loaded document.
* @see {@link https://cheerio.js.org#loading} for additional usage information.
*/
return function load(
content: string | AnyNode | AnyNode[] | Buffer,
options?: CheerioOptions | null,
isDocument = true,
): CheerioAPI {
if ((content as string | null) == null) {
throw new Error('cheerio.load() expects a string');
}
const internalOpts = flattenOptions(options);
const initialRoot = parse(content, internalOpts, isDocument, null);
/**
* Create an extended class here, so that extensions only live on one
* instance.
*/
class LoadedCheerio<T> extends Cheerio<T> {
_make<T>(
selector?: ArrayLike<T> | T | string,
context?: BasicAcceptedElems<AnyNode> | null,
): Cheerio<T> {
const cheerio = initialize(selector, context);
cheerio.prevObject = this;
return cheerio;
}
_parse(
content: string | Document | AnyNode | AnyNode[] | Buffer,
options: InternalOptions,
isDocument: boolean,
context: ParentNode | null,
) {
return parse(content, options, isDocument, context);
}
_render(dom: AnyNode | ArrayLike<AnyNode>): string {
return render(dom, this.options);
}
}
function initialize<T = AnyNode, S extends string = string>(
selector?: ArrayLike<T> | T | S,
context?: BasicAcceptedElems<AnyNode> | null,
root: BasicAcceptedElems<Document> = initialRoot,
opts?: CheerioOptions,
): Cheerio<S extends SelectorType ? Element : T> {
type Result = S extends SelectorType ? Element : T;
// $($)
if (selector && isCheerio<Result>(selector)) return selector;
const options = flattenOptions(opts, internalOpts);
const r =
typeof root === 'string'
? [parse(root, options, false, null)]
: 'length' in root
? root
: [root];
const rootInstance = isCheerio<Document>(r)
? r
: new LoadedCheerio<Document>(r, null, options);
// Add a cyclic reference, so that calling methods on `_root` never fails.
rootInstance._root = rootInstance;
// $(), $(null), $(undefined), $(false)
if (!selector) {
return new LoadedCheerio<Result>(undefined, rootInstance, options);
}
const elements: AnyNode[] | undefined =
typeof selector === 'string' && isHtml(selector)
? // $(<html>)
parse(selector, options, false, null).children
: isNode(selector)
? // $(dom)
[selector]
: Array.isArray(selector)
? // $([dom])
selector
: undefined;
const instance = new LoadedCheerio(elements, rootInstance, options);
if (elements) {
return instance as any;
}
if (typeof selector !== 'string') {
throw new TypeError('Unexpected type of selector');
}
// We know that our selector is a string now.
let search = selector;
const searchContext: Cheerio<AnyNode> | undefined = context
? // If we don't have a context, maybe we have a root, from loading
typeof context === 'string'
? isHtml(context)
? // $('li', '<ul>...</ul>')
new LoadedCheerio<Document>(
[parse(context, options, false, null)],
rootInstance,
options,
)
: // $('li', 'ul')
((search = `${context} ${search}` as S), rootInstance)
: isCheerio<AnyNode>(context)
? // $('li', $)
context
: // $('li', node), $('li', [nodes])
new LoadedCheerio<AnyNode>(
Array.isArray(context) ? context : [context],
rootInstance,
options,
)
: rootInstance;
// If we still don't have a context, return
if (!searchContext) return instance as any;
/*
* #id, .class, tag
*/
return searchContext.find(search) as Cheerio<Result>;
}
// Add in static methods & properties
Object.assign(initialize, staticMethods, {
load,
// `_root` and `_options` are used in static methods.
_root: initialRoot,
_options: internalOpts,
// Add `fn` for plugins
fn: LoadedCheerio.prototype,
// Add the prototype here to maintain `instanceof` behavior.
prototype: LoadedCheerio.prototype,
});
return initialize as CheerioAPI;
};
}
function isNode(obj: any): obj is AnyNode {
return (
!!obj.name ||
obj.type === 'root' ||
obj.type === 'text' ||
obj.type === 'comment'
);
}

View File

@@ -0,0 +1,135 @@
import type { DomHandlerOptions } from 'domhandler';
import type { ParserOptions as HTMLParser2ParserOptions } from 'htmlparser2';
import type { ParserOptions as Parse5ParserOptions } from 'parse5';
import type { Htmlparser2TreeAdapterMap } from 'parse5-htmlparser2-tree-adapter';
import type { Options as SelectOptions } from 'cheerio-select';
/**
* Options accepted by htmlparser2, the default parser for XML.
*
* @see https://github.com/fb55/htmlparser2/wiki/Parser-options
*/
export interface HTMLParser2Options
extends DomHandlerOptions,
HTMLParser2ParserOptions {}
/**
* Options accepted by Cheerio.
*
* Please note that parser-specific options are _only recognized_ if the
* relevant parser is used.
*/
export interface CheerioOptions
extends Parse5ParserOptions<Htmlparser2TreeAdapterMap> {
/**
* Recommended way of configuring htmlparser2 when wanting to parse XML.
*
* This will switch Cheerio to use htmlparser2.
*
* @default false
*/
xml?: HTMLParser2Options | boolean;
/**
* Enable xml mode, which will switch Cheerio to use htmlparser2.
*
* @deprecated Please use the `xml` option instead.
* @default false
*/
xmlMode?: boolean;
/** The base URI for the document. Used to resolve the `href` and `src` props. */
baseURI?: string | URL;
/**
* Is the document in quirks mode?
*
* This will lead to `.className` and `#id` being case-insensitive.
*
* @default false
*/
quirksMode?: SelectOptions['quirksMode'];
/**
* Extension point for pseudo-classes.
*
* Maps from names to either strings of functions.
*
* - A string value is a selector that the element must match to be selected.
* - A function is called with the element as its first argument, and optional
* parameters second. If it returns true, the element is selected.
*
* @example
*
* ```js
* const $ = cheerio.load(
* '<div class="foo"></div><div data-bar="boo"></div>',
* {
* pseudos: {
* // `:foo` is an alias for `div.foo`
* foo: 'div.foo',
* // `:bar(val)` is equivalent to `[data-bar=val s]`
* bar: (el, val) => el.attribs['data-bar'] === val,
* },
* },
* );
*
* $(':foo').length; // 1
* $('div:bar(boo)').length; // 1
* $('div:bar(baz)').length; // 0
* ```
*/
pseudos?: SelectOptions['pseudos'];
}
/** Internal options for Cheerio. */
export interface InternalOptions
extends HTMLParser2Options,
Omit<CheerioOptions, 'xml'> {
/**
* Whether to use htmlparser2.
*
* This is set to true if `xml` is set to true.
*/
_useHtmlParser2?: boolean;
}
const defaultOpts: InternalOptions = {
_useHtmlParser2: false,
};
/**
* Flatten the options for Cheerio.
*
* This will set `_useHtmlParser2` to true if `xml` is set to true.
*
* @param options - The options to flatten.
* @param baseOptions - The base options to use.
* @returns The flattened options.
*/
export function flattenOptions(
options?: CheerioOptions | null,
baseOptions?: InternalOptions,
): InternalOptions {
if (!options) {
return baseOptions ?? defaultOpts;
}
const opts: InternalOptions = {
_useHtmlParser2: !!options.xmlMode,
...baseOptions,
...options,
};
if (options.xml) {
opts._useHtmlParser2 = true;
opts.xmlMode = true;
if (options.xml !== true) {
Object.assign(opts, options.xml);
}
} else if (options.xmlMode) {
opts._useHtmlParser2 = true;
}
return opts;
}

View File

@@ -0,0 +1,452 @@
import { describe, it, expect } from 'vitest';
import type { Document, Element } from 'domhandler';
import { getParse } from './parse.js';
import { parseDocument as parseWithHtmlparser2 } from 'htmlparser2';
import { parseWithParse5 } from './parsers/parse5-adapter.js';
const defaultOpts = { _useHtmlParser2: false };
const parse = getParse((content, options, isDocument, context) =>
options._useHtmlParser2
? parseWithHtmlparser2(content, options)
: parseWithParse5(content, options, isDocument, context),
);
// Tags
const basic = '<html></html>';
const siblings = '<h2></h2><p></p>';
// Single Tags
const single = '<br/>';
const singleWrong = '<br>';
// Children
const children = '<html><br/></html>';
const li = '<li class="durian">Durian</li>';
// Attributes
const attributes = '<img src="hello.png" alt="man waving">';
const noValueAttribute = '<textarea disabled></textarea>';
// Comments
const comment = '<!-- sexy -->';
const conditional =
'<!--[if IE 8]><html class="no-js ie8" lang="en"><![endif]-->';
// Text
const text = 'lorem ipsum';
// Script
const script = '<script type="text/javascript">alert("hi world!");</script>';
const scriptEmpty = '<script></script>';
// Style
const style = '<style type="text/css"> h2 { color:blue; } </style>';
const styleEmpty = '<style></style>';
// Directives
const directive = '<!doctype html>';
function rootTest(root: Document) {
expect(root).toHaveProperty('type', 'root');
expect(root.nextSibling).toBe(null);
expect(root.previousSibling).toBe(null);
expect(root.parentNode).toBe(null);
const child = root.childNodes[0];
expect(child.parentNode).toBe(root);
}
describe('parse', () => {
describe('evaluate', () => {
it(`should parse basic empty tags: ${basic}`, () => {
const [tag] = parse(basic, defaultOpts, true, null).children as Element[];
expect(tag.type).toBe('tag');
expect(tag.tagName).toBe('html');
expect(tag.childNodes).toHaveLength(2);
});
it(`should handle sibling tags: ${siblings}`, () => {
const dom = parse(siblings, defaultOpts, false, null)
.children as Element[];
const [h2, p] = dom;
expect(dom).toHaveLength(2);
expect(h2.tagName).toBe('h2');
expect(p.tagName).toBe('p');
});
it(`should handle single tags: ${single}`, () => {
const [tag] = parse(single, defaultOpts, false, null)
.children as Element[];
expect(tag.type).toBe('tag');
expect(tag.tagName).toBe('br');
expect(tag.childNodes).toHaveLength(0);
});
it(`should handle malformatted single tags: ${singleWrong}`, () => {
const [tag] = parse(singleWrong, defaultOpts, false, null)
.children as Element[];
expect(tag.type).toBe('tag');
expect(tag.tagName).toBe('br');
expect(tag.childNodes).toHaveLength(0);
});
it(`should handle tags with children: ${children}`, () => {
const [tag] = parse(children, defaultOpts, true, null)
.children as Element[];
expect(tag.type).toBe('tag');
expect(tag.tagName).toBe('html');
expect(tag.childNodes).toBeTruthy();
expect(tag.childNodes[1]).toHaveProperty('tagName', 'body');
expect((tag.childNodes[1] as Element).childNodes).toHaveLength(1);
});
it(`should handle tags with children: ${li}`, () => {
const [tag] = parse(li, defaultOpts, false, null).children as Element[];
expect(tag.childNodes).toHaveLength(1);
expect(tag.childNodes[0]).toHaveProperty('data', 'Durian');
});
it(`should handle tags with attributes: ${attributes}`, () => {
const attrs = parse(attributes, defaultOpts, false, null)
.children[0] as Element;
expect(attrs.attribs).toBeTruthy();
expect(attrs.attribs).toHaveProperty('src', 'hello.png');
expect(attrs.attribs).toHaveProperty('alt', 'man waving');
});
it(`should handle value-less attributes: ${noValueAttribute}`, () => {
const attrs = parse(noValueAttribute, defaultOpts, false, null)
.children[0] as Element;
expect(attrs.attribs).toBeTruthy();
expect(attrs.attribs).toHaveProperty('disabled', '');
});
it(`should handle comments: ${comment}`, () => {
const elem = parse(comment, defaultOpts, false, null).children[0];
expect(elem.type).toBe('comment');
expect(elem).toHaveProperty('data', ' sexy ');
});
it(`should handle conditional comments: ${conditional}`, () => {
const elem = parse(conditional, defaultOpts, false, null).children[0];
expect(elem.type).toBe('comment');
expect(elem).toHaveProperty(
'data',
conditional.replace('<!--', '').replace('-->', ''),
);
});
it(`should handle text: ${text}`, () => {
const text_ = parse(text, defaultOpts, false, null).children[0];
expect(text_.type).toBe('text');
expect(text_).toHaveProperty('data', 'lorem ipsum');
});
it(`should handle script tags: ${script}`, () => {
const script_ = parse(script, defaultOpts, false, null)
.children[0] as Element;
expect(script_.type).toBe('script');
expect(script_.tagName).toBe('script');
expect(script_.attribs).toHaveProperty('type', 'text/javascript');
expect(script_.childNodes).toHaveLength(1);
expect(script_.childNodes[0].type).toBe('text');
expect(script_.childNodes[0]).toHaveProperty(
'data',
'alert("hi world!");',
);
});
it(`should handle style tags: ${style}`, () => {
const style_ = parse(style, defaultOpts, false, null)
.children[0] as Element;
expect(style_.type).toBe('style');
expect(style_.tagName).toBe('style');
expect(style_.attribs).toHaveProperty('type', 'text/css');
expect(style_.childNodes).toHaveLength(1);
expect(style_.childNodes[0].type).toBe('text');
expect(style_.childNodes[0]).toHaveProperty(
'data',
' h2 { color:blue; } ',
);
});
it(`should handle directives: ${directive}`, () => {
const elem = parse(directive, defaultOpts, true, null).children[0];
expect(elem.type).toBe('directive');
expect(elem).toHaveProperty('data', '!DOCTYPE html');
expect(elem).toHaveProperty('name', '!doctype');
});
});
describe('.parse', () => {
// Root test utility
it(`should add root to: ${basic}`, () => {
const root = parse(basic, defaultOpts, true, null);
rootTest(root);
expect(root.childNodes).toHaveLength(1);
expect(root.childNodes[0]).toHaveProperty('tagName', 'html');
});
it(`should add root to: ${siblings}`, () => {
const root = parse(siblings, defaultOpts, false, null);
rootTest(root);
expect(root.childNodes).toHaveLength(2);
expect(root.childNodes[0]).toHaveProperty('tagName', 'h2');
expect(root.childNodes[1]).toHaveProperty('tagName', 'p');
expect(root.childNodes[1].parent).toBe(root);
});
it(`should add root to: ${comment}`, () => {
const root = parse(comment, defaultOpts, false, null);
rootTest(root);
expect(root.childNodes).toHaveLength(1);
expect(root.childNodes[0].type).toBe('comment');
});
it(`should add root to: ${text}`, () => {
const root = parse(text, defaultOpts, false, null);
rootTest(root);
expect(root.childNodes).toHaveLength(1);
expect(root.childNodes[0].type).toBe('text');
});
it(`should add root to: ${scriptEmpty}`, () => {
const root = parse(scriptEmpty, defaultOpts, false, null);
rootTest(root);
expect(root.childNodes).toHaveLength(1);
expect(root.childNodes[0].type).toBe('script');
});
it(`should add root to: ${styleEmpty}`, () => {
const root = parse(styleEmpty, defaultOpts, false, null);
rootTest(root);
expect(root.childNodes).toHaveLength(1);
expect(root.childNodes[0].type).toBe('style');
});
it(`should add root to: ${directive}`, () => {
const root = parse(directive, defaultOpts, true, null);
rootTest(root);
expect(root.childNodes).toHaveLength(2);
expect(root.childNodes[0].type).toBe('directive');
});
it('should simply return root', () => {
const oldroot = parse(basic, defaultOpts, true, null);
const root = parse(oldroot, defaultOpts, true, null);
expect(root).toBe(oldroot);
rootTest(root);
expect(root.childNodes).toHaveLength(1);
expect(root.childNodes[0]).toHaveProperty('tagName', 'html');
});
it('should expose the DOM level 1 API', () => {
const root = parse(
'<div><a></a><span></span><p></p></div>',
defaultOpts,
false,
null,
).childNodes[0] as Element;
const childNodes = root.childNodes as Element[];
expect(childNodes).toHaveLength(3);
expect(root.tagName).toBe('div');
expect(root.firstChild).toBe(childNodes[0]);
expect(root.lastChild).toBe(childNodes[2]);
expect(childNodes[0].tagName).toBe('a');
expect(childNodes[0].previousSibling).toBe(null);
expect(childNodes[0].nextSibling).toBe(childNodes[1]);
expect(childNodes[0].parentNode).toBe(root);
expect((childNodes[0] as Element).childNodes).toHaveLength(0);
expect(childNodes[0].firstChild).toBe(null);
expect(childNodes[0].lastChild).toBe(null);
expect(childNodes[1].tagName).toBe('span');
expect(childNodes[1].previousSibling).toBe(childNodes[0]);
expect(childNodes[1].nextSibling).toBe(childNodes[2]);
expect(childNodes[1].parentNode).toBe(root);
expect(childNodes[1].childNodes).toHaveLength(0);
expect(childNodes[1].firstChild).toBe(null);
expect(childNodes[1].lastChild).toBe(null);
expect(childNodes[2].tagName).toBe('p');
expect(childNodes[2].previousSibling).toBe(childNodes[1]);
expect(childNodes[2].nextSibling).toBe(null);
expect(childNodes[2].parentNode).toBe(root);
expect(childNodes[2].childNodes).toHaveLength(0);
expect(childNodes[2].firstChild).toBe(null);
expect(childNodes[2].lastChild).toBe(null);
});
it('Should parse less than or equal sign sign', () => {
const root = parse('<i>A</i><=<i>B</i>', defaultOpts, false, null);
const { childNodes } = root;
expect(childNodes[0]).toHaveProperty('tagName', 'i');
expect((childNodes[0] as Element).childNodes[0]).toHaveProperty(
'data',
'A',
);
expect(childNodes[1]).toHaveProperty('data', '<=');
expect(childNodes[2]).toHaveProperty('tagName', 'i');
expect((childNodes[2] as Element).childNodes[0]).toHaveProperty(
'data',
'B',
);
});
it('Should ignore unclosed CDATA', () => {
const root = parse(
'<a></a><script>foo //<![CDATA[ bar</script><b></b>',
defaultOpts,
false,
null,
);
const childNodes = root.childNodes as Element[];
expect(childNodes[0].tagName).toBe('a');
expect(childNodes[1].tagName).toBe('script');
expect(childNodes[1].childNodes[0]).toHaveProperty(
'data',
'foo //<![CDATA[ bar',
);
expect(childNodes[2].tagName).toBe('b');
});
it('Should add <head> to documents', () => {
const root = parse('<html></html>', defaultOpts, true, null);
const childNodes = root.childNodes as Element[];
expect(childNodes[0].tagName).toBe('html');
expect(childNodes[0].childNodes[0]).toHaveProperty('tagName', 'head');
});
it('Should implicitly create <tr> around <td>', () => {
const root = parse(
'<table><td>bar</td></tr></table>',
defaultOpts,
false,
null,
);
const childNodes = root.childNodes as Element[];
expect(childNodes[0].tagName).toBe('table');
expect(childNodes[0].childNodes.length).toBe(1);
expect(childNodes[0].childNodes[0]).toHaveProperty('tagName', 'tbody');
expect((childNodes[0] as any).childNodes[0].childNodes[0]).toHaveProperty(
'tagName',
'tr',
);
expect(
(childNodes[0] as any).childNodes[0].childNodes[0].childNodes[0]
.tagName,
).toBe('td');
expect(
(childNodes[0] as any).childNodes[0].childNodes[0].childNodes[0]
.childNodes[0].data,
).toBe('bar');
});
it('Should parse custom tag <line>', () => {
const root = parse('<line>test</line>', defaultOpts, false, null);
const childNodes = root.childNodes as Element[];
expect(childNodes.length).toBe(1);
expect(childNodes[0].tagName).toBe('line');
expect(childNodes[0].childNodes[0]).toHaveProperty('data', 'test');
});
it('Should properly parse misnested table tags', () => {
const root = parse(
'<tr><td>i1</td></tr><tr><td>i2</td></td></tr><tr><td>i3</td></td></tr>',
defaultOpts,
false,
null,
);
const childNodes = root.childNodes as Element[];
expect(childNodes.length).toBe(3);
for (let i = 0; i < childNodes.length; i++) {
const child = childNodes[i];
expect(child.tagName).toBe('tr');
expect(child.childNodes[0]).toHaveProperty('tagName', 'td');
expect((child.childNodes[0] as Element).childNodes[0]).toHaveProperty(
'data',
`i${i + 1}`,
);
}
});
it('Should correctly parse data url attributes', () => {
const html =
'<div style=\'font-family:"butcherman-caps"; src:url(data:font/opentype;base64,AAEA...);\'></div>';
const expectedAttr =
'font-family:"butcherman-caps"; src:url(data:font/opentype;base64,AAEA...);';
const root = parse(html, defaultOpts, false, null);
const childNodes = root.childNodes as Element[];
expect(childNodes[0].attribs).toHaveProperty('style', expectedAttr);
});
it('Should treat <xmp> tag content as text', () => {
const root = parse('<xmp><h2></xmp>', defaultOpts, false, null);
const childNodes = root.childNodes as Element[];
expect(childNodes[0].childNodes[0]).toHaveProperty('data', '<h2>');
});
it('Should correctly parse malformed numbered entities', () => {
const root = parse('<p>z&#</p>', defaultOpts, false, null);
const childNodes = root.childNodes as Element[];
expect(childNodes[0].childNodes[0]).toHaveProperty('data', 'z&#');
});
it('Should correctly parse mismatched headings', () => {
const root = parse('<h2>Test</h3><div></div>', defaultOpts, false, null);
const { childNodes } = root;
expect(childNodes.length).toBe(2);
expect(childNodes[0]).toHaveProperty('tagName', 'h2');
expect(childNodes[1]).toHaveProperty('tagName', 'div');
});
it('Should correctly parse tricky <pre> content', () => {
const root = parse(
'<pre>\nA <- factor(A, levels = c("c","a","b"))\n</pre>',
defaultOpts,
false,
null,
);
const childNodes = root.childNodes as Element[];
expect(childNodes.length).toBe(1);
expect(childNodes[0].tagName).toBe('pre');
expect(childNodes[0].childNodes[0]).toHaveProperty(
'data',
'A <- factor(A, levels = c("c","a","b"))\n',
);
});
it('should pass the options for including the location info to parse5', () => {
const root = parse(
'<p>Hello</p>',
{ ...defaultOpts, sourceCodeLocationInfo: true },
false,
null,
);
const location = root.children[0].sourceCodeLocation;
expect(typeof location).toBe('object');
expect(location?.endOffset).toBe(12);
});
});
});

View File

@@ -0,0 +1,105 @@
import { removeElement } from 'domutils';
import {
type AnyNode,
Document,
type ParentNode,
isDocument as checkIsDocument,
} from 'domhandler';
import type { InternalOptions } from './options.js';
/**
* Get the parse function with options.
*
* @param parser - The parser function.
* @returns The parse function with options.
*/
export function getParse(
parser: (
content: string,
options: InternalOptions,
isDocument: boolean,
context: ParentNode | null,
) => Document,
) {
/**
* Parse a HTML string or a node.
*
* @param content - The HTML string or node.
* @param options - The parser options.
* @param isDocument - If `content` is a document.
* @param context - The context node in the DOM tree.
* @returns The parsed document node.
*/
return function parse(
content: string | Document | AnyNode | AnyNode[] | Buffer,
options: InternalOptions,
isDocument: boolean,
context: ParentNode | null,
): Document {
if (typeof Buffer !== 'undefined' && Buffer.isBuffer(content)) {
content = content.toString();
}
if (typeof content === 'string') {
return parser(content, options, isDocument, context);
}
const doc = content as AnyNode | AnyNode[] | Document;
if (!Array.isArray(doc) && checkIsDocument(doc)) {
// If `doc` is already a root, just return it
return doc;
}
// Add conent to new root element
const root = new Document([]);
// Update the DOM using the root
update(doc, root);
return root;
};
}
/**
* Update the dom structure, for one changed layer.
*
* @param newChilds - The new children.
* @param parent - The new parent.
* @returns The parent node.
*/
export function update(
newChilds: AnyNode[] | AnyNode,
parent: ParentNode | null,
): ParentNode | null {
// Normalize
const arr = Array.isArray(newChilds) ? newChilds : [newChilds];
// Update parent
if (parent) {
parent.children = arr;
} else {
parent = null;
}
// Update neighbors
for (let i = 0; i < arr.length; i++) {
const node = arr[i];
// Cleanly remove existing nodes from their previous structures.
if (node.parent && node.parent.children !== arr) {
removeElement(node);
}
if (parent) {
node.prev = arr[i - 1] || null;
node.next = arr[i + 1] || null;
} else {
node.prev = node.next = null;
}
node.parent = parent;
}
return parent;
}

View File

@@ -0,0 +1,66 @@
import {
type AnyNode,
type Document,
type ParentNode,
isDocument,
} from 'domhandler';
import { parse as parseDocument, parseFragment, serializeOuter } from 'parse5';
import { adapter as htmlparser2Adapter } from 'parse5-htmlparser2-tree-adapter';
import type { InternalOptions } from '../options.js';
/**
* Parse the content with `parse5` in the context of the given `ParentNode`.
*
* @param content - The content to parse.
* @param options - A set of options to use to parse.
* @param isDocument - Whether to parse the content as a full HTML document.
* @param context - The context in which to parse the content.
* @returns The parsed content.
*/
export function parseWithParse5(
content: string,
options: InternalOptions,
isDocument: boolean,
context: ParentNode | null,
): Document {
options.treeAdapter ??= htmlparser2Adapter;
if (options.scriptingEnabled !== false) {
options.scriptingEnabled = true;
}
return isDocument
? parseDocument(content, options)
: parseFragment(context, content, options);
}
const renderOpts = { treeAdapter: htmlparser2Adapter };
/**
* Renders the given DOM tree with `parse5` and returns the result as a string.
*
* @param dom - The DOM tree to render.
* @returns The rendered document.
*/
export function renderWithParse5(dom: AnyNode | ArrayLike<AnyNode>): string {
/*
* `dom-serializer` passes over the special "root" node and renders the
* node's children in its place. To mimic this behavior with `parse5`, an
* equivalent operation must be applied to the input array.
*/
const nodes = 'length' in dom ? dom : [dom];
for (let index = 0; index < nodes.length; index += 1) {
const node = nodes[index];
if (isDocument(node)) {
Array.prototype.splice.call(nodes, index, 1, ...node.children);
}
}
let result = '';
for (let index = 0; index < nodes.length; index += 1) {
const node = nodes[index];
result += serializeOuter(node, renderOpts);
}
return result;
}

View File

@@ -0,0 +1,33 @@
/**
* @file Alternative entry point for Cheerio that always uses htmlparser2. This
* way, parse5 won't be loaded, saving some memory.
*/
import { type CheerioAPI, getLoad } from './load.js';
import { type CheerioOptions } from './options.js';
import { getParse } from './parse.js';
import type { AnyNode } from 'domhandler';
import render from 'dom-serializer';
import { parseDocument } from 'htmlparser2';
export { contains, merge } from './static.js';
export type * from './types.js';
export type { Cheerio } from './cheerio.js';
export type { CheerioOptions, HTMLParser2Options } from './options.js';
export type { CheerioAPI } from './load.js';
/**
* Create a querying function, bound to a document created from the provided
* markup.
*
* @param content - Markup to be loaded.
* @param options - Options for the created instance.
* @param isDocument - Always `false` here, as we are always using
* `htmlparser2`.
* @returns The loaded document.
* @see {@link https://cheerio.js.org#loading} for additional usage information.
*/
export const load: (
content: string | AnyNode | AnyNode[] | Buffer,
options?: CheerioOptions | null,
isDocument?: boolean,
) => CheerioAPI = getLoad(getParse(parseDocument), render);

View File

@@ -0,0 +1,325 @@
import { describe, it, expect, beforeEach } from 'vitest';
import { cheerio, food, eleven } from './__fixtures__/fixtures.js';
import { type CheerioAPI } from './index.js';
describe('cheerio', () => {
describe('.html', () => {
it('() : should return innerHTML; $.html(obj) should return outerHTML', () => {
const $div = cheerio(
'div',
'<div><span>foo</span><span>bar</span></div>',
);
const span = $div.children()[1];
expect(cheerio(span).html()).toBe('bar');
expect(cheerio.html(span)).toBe('<span>bar</span>');
});
it('(<obj>) : should accept an object, an array, or a cheerio object', () => {
const $span = cheerio('<span>foo</span>');
expect(cheerio.html($span[0])).toBe('<span>foo</span>');
expect(cheerio.html($span)).toBe('<span>foo</span>');
});
it('(<value>) : should be able to set to an empty string', () => {
const $elem = cheerio('<span>foo</span>').html('');
expect(cheerio.html($elem)).toBe('<span></span>');
});
it('(<root>) : does not render the root element', () => {
const $ = cheerio.load('');
expect(cheerio.html($.root())).toBe(
'<html><head></head><body></body></html>',
);
});
it('(<elem>, <root>, <elem>) : does not render the root element', () => {
const $ = cheerio.load('<div>a div</div><span>a span</span>');
const $collection = $('div').add($.root()).add('span');
const expected =
'<html><head></head><body><div>a div</div><span>a span</span></body></html><div>a div</div><span>a span</span>';
expect(cheerio.html($collection)).toBe(expected);
});
it('() : does not crash with `null` as `this` value', () => {
const { html } = cheerio;
expect(html.call(null as never)).toBe('');
expect(html.call(null as never, '#nothing')).toBe('');
});
});
describe('.text', () => {
it('(cheerio object) : should return the text contents of the specified elements', () => {
const $ = cheerio.load('<a>This is <em>content</em>.</a>');
expect(cheerio.text($('a'))).toBe('This is content.');
});
it('(cheerio object) : should omit comment nodes', () => {
const $ = cheerio.load(
'<a>This is <!-- a comment --> not a comment.</a>',
);
expect(cheerio.text($('a'))).toBe('This is not a comment.');
});
it('(cheerio object) : should include text contents of children recursively', () => {
const $ = cheerio.load(
'<a>This is <div>a child with <span>another child and <!-- a comment --> not a comment</span> followed by <em>one last child</em> and some final</div> text.</a>',
);
expect(cheerio.text($('a'))).toBe(
'This is a child with another child and not a comment followed by one last child and some final text.',
);
});
it('() : should return the rendered text content of the root', () => {
const $ = cheerio.load(
'<a>This is <div>a child with <span>another child and <!-- a comment --> not a comment</span> followed by <em>one last child</em> and some final</div> text.</a>',
);
expect(cheerio.text($.root())).toBe(
'This is a child with another child and not a comment followed by one last child and some final text.',
);
});
it('(cheerio object) : should not omit script tags', () => {
const $ = cheerio.load('<script>console.log("test")</script>');
expect(cheerio.text($.root())).toBe('console.log("test")');
});
it('(cheerio object) : should omit style tags', () => {
const $ = cheerio.load(
'<style type="text/css">.cf-hidden { display: none; }</style>',
);
expect($.text()).toBe('.cf-hidden { display: none; }');
});
it('() : does not crash with `null` as `this` value', () => {
const { text } = cheerio;
expect(text.call(null as never)).toBe('');
});
});
describe('.parseHTML', () => {
const $ = cheerio.load('');
it('() : returns null', () => {
expect($.parseHTML()).toBe(null);
});
it('(null) : returns null', () => {
expect($.parseHTML(null)).toBe(null);
});
it('("") : returns null', () => {
expect($.parseHTML('')).toBe(null);
});
it('(largeHtmlString) : parses large HTML strings', () => {
const html = '<div></div>'.repeat(10);
const nodes = $.parseHTML(html);
expect(nodes.length).toBe(10);
expect(nodes).toBeInstanceOf(Array);
});
it('("<script>") : ignores scripts by default', () => {
const html = '<script>undefined()</script>';
expect($.parseHTML(html)).toHaveLength(0);
});
it('("<script>", true) : preserves scripts when requested', () => {
const html = '<script>undefined()</script>';
expect($.parseHTML(html, true)[0]).toHaveProperty('tagName', 'script');
});
it('("scriptAndNonScript) : preserves non-script nodes', () => {
const html = '<script>undefined()</script><div></div>';
expect($.parseHTML(html)[0]).toHaveProperty('tagName', 'div');
});
it('(scriptAndNonScript, true) : Preserves script position', () => {
const html = '<script>undefined()</script><div></div>';
expect($.parseHTML(html, true)[0]).toHaveProperty('tagName', 'script');
});
it('(text) : returns a text node', () => {
expect($.parseHTML('text')[0].type).toBe('text');
});
it('(<tab>>text) : preserves leading whitespace', () => {
expect($.parseHTML('\t<div></div>')[0]).toHaveProperty('data', '\t');
});
it('( text) : Leading spaces are treated as text nodes', () => {
expect($.parseHTML(' <div/> ')[0].type).toBe('text');
});
it('(html) : should preserve content', () => {
const html = '<div>test div</div>';
expect(cheerio($.parseHTML(html)[0]).html()).toBe('test div');
});
it('(malformedHtml) : should not break', () => {
expect($.parseHTML('<span><span>')).toHaveLength(1);
});
it('(garbageInput) : should not cause an error', () => {
expect(
$.parseHTML('<#if><tr><p>This is a test.</p></tr><#/if>'),
).toBeTruthy();
});
it('(text) : should return an array that is not effected by DOM manipulation methods', () => {
const $div = cheerio.load('<div>');
const elems = $div.parseHTML('<b></b><i></i>');
$div('div').append(elems);
expect(elems).toHaveLength(2);
});
it('(html, context) : should ignore context argument', () => {
const $div = cheerio.load('<div>');
const elems = $div.parseHTML('<script>foo</script><a>', { foo: 123 });
$div('div').append(elems);
expect(elems).toHaveLength(1);
});
it('(html, context, keepScripts) : should ignore context argument', () => {
const $div = cheerio.load('<div>');
const elems = $div.parseHTML(
'<script>foo</script><a>',
{ foo: 123 },
true,
);
$div('div').append(elems);
expect(elems).toHaveLength(2);
});
});
describe('.merge', () => {
const $ = cheerio.load('');
it('should be a function', () => {
expect(typeof $.merge).toBe('function');
});
it('(arraylike, arraylike) : should modify the first array, but not the second', () => {
const arr1 = [1, 2, 3];
const arr2 = [4, 5, 6];
const ret = $.merge(arr1, arr2);
expect(typeof ret).toBe('object');
expect(Array.isArray(ret)).toBe(true);
expect(ret).toBe(arr1);
expect(arr1).toHaveLength(6);
expect(arr2).toHaveLength(3);
});
it('(arraylike, arraylike) : should handle objects that arent arrays, but are arraylike', () => {
const arr1: ArrayLike<string> = {
length: 3,
0: 'a',
1: 'b',
2: 'c',
};
const arr2 = {
length: 3,
0: 'd',
1: 'e',
2: 'f',
};
$.merge(arr1, arr2);
expect(arr1).toHaveLength(6);
expect(arr1[3]).toBe('d');
expect(arr1[4]).toBe('e');
expect(arr1[5]).toBe('f');
expect(arr2).toHaveLength(3);
});
it('(?, ?) : should gracefully reject invalid inputs', () => {
expect($.merge([4], 3 as never)).toBeFalsy();
expect($.merge({} as never, {} as never)).toBeFalsy();
expect($.merge([], {} as never)).toBeFalsy();
expect($.merge({} as never, [])).toBeFalsy();
const fakeArray1 = { length: 3, 0: 'a', 1: 'b', 3: 'd' };
expect($.merge(fakeArray1, [])).toBeFalsy();
expect($.merge([], fakeArray1)).toBeFalsy();
expect($.merge({ length: '7' } as never, [])).toBeFalsy();
expect($.merge({ length: -1 }, [])).toBeFalsy();
});
it('(?, ?) : should no-op on invalid inputs', () => {
const fakeArray1 = { length: 3, 0: 'a', 1: 'b', 3: 'd' };
$.merge(fakeArray1, []);
expect(fakeArray1).toHaveLength(3);
expect(fakeArray1[0]).toBe('a');
expect(fakeArray1[1]).toBe('b');
expect(fakeArray1[3]).toBe('d');
$.merge([], fakeArray1);
expect(fakeArray1).toHaveLength(3);
expect(fakeArray1[0]).toBe('a');
expect(fakeArray1[1]).toBe('b');
expect(fakeArray1[3]).toBe('d');
});
});
describe('.contains', () => {
let $: CheerioAPI;
beforeEach(() => {
$ = cheerio.load(food);
});
it('(container, contained) : should correctly detect the provided element', () => {
const $food = $('#food');
const $fruits = $('#fruits');
const $apple = $('.apple');
expect($.contains($food[0], $fruits[0])).toBe(true);
expect($.contains($food[0], $apple[0])).toBe(true);
});
it('(container, other) : should not detect elements that are not contained', () => {
const $fruits = $('#fruits');
const $vegetables = $('#vegetables');
const $apple = $('.apple');
expect($.contains($vegetables[0], $apple[0])).toBe(false);
expect($.contains($fruits[0], $vegetables[0])).toBe(false);
expect($.contains($vegetables[0], $fruits[0])).toBe(false);
expect($.contains($fruits[0], $fruits[0])).toBe(false);
expect($.contains($vegetables[0], $vegetables[0])).toBe(false);
});
});
describe('.root', () => {
it('() : should return a cheerio-wrapped root object', () => {
const $ = cheerio.load('<html><head></head><body>foo</body></html>');
$.root().append('<div id="test"></div>');
expect($.html()).toBe(
'<html><head></head><body>foo</body></html><div id="test"></div>',
);
});
});
describe('.extract', () => {
it('() : should extract values for selectors', () => {
const $ = cheerio.load(eleven);
expect(
$.extract({
red: [{ selector: '.red', value: 'outerHTML' }],
}),
).toStrictEqual({
red: [
'<li class="red">Four</li>',
'<li class="red">Five</li>',
'<li class="red sel">Nine</li>',
],
});
});
});
});

View File

@@ -0,0 +1,312 @@
import type { BasicAcceptedElems } from './types.js';
import type { CheerioAPI } from './load.js';
import type { Cheerio } from './cheerio.js';
import type { AnyNode, Document } from 'domhandler';
import { textContent } from 'domutils';
import {
type InternalOptions,
type CheerioOptions,
flattenOptions as flattenOptions,
} from './options.js';
import type { ExtractedMap, ExtractMap } from './api/extract.js';
/**
* Helper function to render a DOM.
*
* @param that - Cheerio instance to render.
* @param dom - The DOM to render. Defaults to `that`'s root.
* @param options - Options for rendering.
* @returns The rendered document.
*/
function render(
that: CheerioAPI,
dom: BasicAcceptedElems<AnyNode> | undefined,
options: InternalOptions,
): string {
if (!that) return '';
return that(dom ?? that._root.children, null, undefined, options).toString();
}
/**
* Checks if a passed object is an options object.
*
* @param dom - Object to check if it is an options object.
* @param options - Options object.
* @returns Whether the object is an options object.
*/
function isOptions(
dom?: BasicAcceptedElems<AnyNode> | CheerioOptions | null,
options?: CheerioOptions,
): dom is CheerioOptions {
return (
!options &&
typeof dom === 'object' &&
dom != null &&
!('length' in dom) &&
!('type' in dom)
);
}
/**
* Renders the document.
*
* @category Static
* @param options - Options for the renderer.
* @returns The rendered document.
*/
export function html(this: CheerioAPI, options?: CheerioOptions): string;
/**
* Renders the document.
*
* @category Static
* @param dom - Element to render.
* @param options - Options for the renderer.
* @returns The rendered document.
*/
export function html(
this: CheerioAPI,
dom?: BasicAcceptedElems<AnyNode>,
options?: CheerioOptions,
): string;
export function html(
this: CheerioAPI,
dom?: BasicAcceptedElems<AnyNode> | CheerioOptions,
options?: CheerioOptions,
): string {
/*
* Be flexible about parameters, sometimes we call html(),
* with options as only parameter
* check dom argument for dom element specific properties
* assume there is no 'length' or 'type' properties in the options object
*/
const toRender = isOptions(dom) ? ((options = dom), undefined) : dom;
/*
* Sometimes `$.html()` is used without preloading html,
* so fallback non-existing options to the default ones.
*/
const opts = {
...this?._options,
...flattenOptions(options),
};
return render(this, toRender, opts);
}
/**
* Render the document as XML.
*
* @category Static
* @param dom - Element to render.
* @returns THe rendered document.
*/
export function xml(
this: CheerioAPI,
dom?: BasicAcceptedElems<AnyNode>,
): string {
const options = { ...this._options, xmlMode: true };
return render(this, dom, options);
}
/**
* Render the document as text.
*
* This returns the `textContent` of the passed elements. The result will
* include the contents of `<script>` and `<style>` elements. To avoid this, use
* `.prop('innerText')` instead.
*
* @category Static
* @param elements - Elements to render.
* @returns The rendered document.
*/
export function text(
this: CheerioAPI | void,
elements?: ArrayLike<AnyNode>,
): string {
const elems = elements ?? (this ? this.root() : []);
let ret = '';
for (let i = 0; i < elems.length; i++) {
ret += textContent(elems[i]);
}
return ret;
}
/**
* Parses a string into an array of DOM nodes. The `context` argument has no
* meaning for Cheerio, but it is maintained for API compatibility with jQuery.
*
* @category Static
* @param data - Markup that will be parsed.
* @param context - Will be ignored. If it is a boolean it will be used as the
* value of `keepScripts`.
* @param keepScripts - If false all scripts will be removed.
* @returns The parsed DOM.
* @alias Cheerio.parseHTML
* @see {@link https://api.jquery.com/jQuery.parseHTML/}
*/
export function parseHTML(
this: CheerioAPI,
data: string,
context?: unknown | boolean,
keepScripts?: boolean,
): AnyNode[];
export function parseHTML(this: CheerioAPI, data?: '' | null): null;
export function parseHTML(
this: CheerioAPI,
data?: string | null,
context?: unknown | boolean,
keepScripts = typeof context === 'boolean' ? context : false,
): AnyNode[] | null {
if (!data || typeof data !== 'string') {
return null;
}
if (typeof context === 'boolean') {
keepScripts = context;
}
const parsed = this.load(data, this._options, false);
if (!keepScripts) {
parsed('script').remove();
}
/*
* The `children` array is used by Cheerio internally to group elements that
* share the same parents. When nodes created through `parseHTML` are
* inserted into previously-existing DOM structures, they will be removed
* from the `children` array. The results of `parseHTML` should remain
* constant across these operations, so a shallow copy should be returned.
*/
return [...parsed.root()[0].children];
}
/**
* Sometimes you need to work with the top-level root element. To query it, you
* can use `$.root()`.
*
* @category Static
* @example
*
* ```js
* $.root().append('<ul id="vegetables"></ul>').html();
* //=> <ul id="fruits">...</ul><ul id="vegetables"></ul>
* ```
*
* @returns Cheerio instance wrapping the root node.
* @alias Cheerio.root
*/
export function root(this: CheerioAPI): Cheerio<Document> {
return this(this._root);
}
/**
* Checks to see if the `contained` DOM element is a descendant of the
* `container` DOM element.
*
* @category Static
* @param container - Potential parent node.
* @param contained - Potential child node.
* @returns Indicates if the nodes contain one another.
* @alias Cheerio.contains
* @see {@link https://api.jquery.com/jQuery.contains/}
*/
export function contains(container: AnyNode, contained: AnyNode): boolean {
// According to the jQuery API, an element does not "contain" itself
if (contained === container) {
return false;
}
/*
* Step up the descendants, stopping when the root element is reached
* (signaled by `.parent` returning a reference to the same object)
*/
let next: AnyNode | null = contained;
while (next && next !== next.parent) {
next = next.parent;
if (next === container) {
return true;
}
}
return false;
}
/**
* Extract multiple values from a document, and store them in an object.
*
* @category Static
* @param map - An object containing key-value pairs. The keys are the names of
* the properties to be created on the object, and the values are the
* selectors to be used to extract the values.
* @returns An object containing the extracted values.
*/
export function extract<M extends ExtractMap>(
this: CheerioAPI,
map: M,
): ExtractedMap<M> {
return this.root().extract(map);
}
type Writable<T> = { -readonly [P in keyof T]: T[P] };
/**
* $.merge().
*
* @category Static
* @param arr1 - First array.
* @param arr2 - Second array.
* @returns `arr1`, with elements of `arr2` inserted.
* @alias Cheerio.merge
* @see {@link https://api.jquery.com/jQuery.merge/}
*/
export function merge<T>(
arr1: Writable<ArrayLike<T>>,
arr2: ArrayLike<T>,
): ArrayLike<T> | undefined {
if (!isArrayLike(arr1) || !isArrayLike(arr2)) {
return;
}
let newLength = arr1.length;
const len = +arr2.length;
for (let i = 0; i < len; i++) {
arr1[newLength++] = arr2[i];
}
arr1.length = newLength;
return arr1;
}
/**
* Checks if an object is array-like.
*
* @category Static
* @param item - Item to check.
* @returns Indicates if the item is array-like.
*/
function isArrayLike(item: unknown): item is ArrayLike<unknown> {
if (Array.isArray(item)) {
return true;
}
if (
typeof item !== 'object' ||
item === null ||
!('length' in item) ||
typeof item.length !== 'number' ||
item.length < 0
) {
return false;
}
for (let i = 0; i < item.length; i++) {
if (!(i in item)) {
return false;
}
}
return true;
}

View File

@@ -0,0 +1,58 @@
/** @file Types used in signatures of Cheerio methods. */
type LowercaseLetters =
| 'a'
| 'b'
| 'c'
| 'd'
| 'e'
| 'f'
| 'g'
| 'h'
| 'i'
| 'j'
| 'k'
| 'l'
| 'm'
| 'n'
| 'o'
| 'p'
| 'q'
| 'r'
| 's'
| 't'
| 'u'
| 'v'
| 'w'
| 'x'
| 'y'
| 'z';
type AlphaNumeric =
| LowercaseLetters
| Uppercase<LowercaseLetters>
| `${number}`;
type SelectorSpecial = '.' | '#' | ':' | '|' | '>' | '+' | '~' | '[';
/**
* Type for identifying selectors. Allows us to "upgrade" queries using
* selectors to return `Element`s.
*/
export type SelectorType =
| `${SelectorSpecial}${AlphaNumeric}${string}`
| `${AlphaNumeric}${string}`;
import type { Cheerio } from './cheerio.js';
import type { AnyNode } from 'domhandler';
/** Elements that can be passed to manipulation methods. */
export type BasicAcceptedElems<T extends AnyNode> = ArrayLike<T> | T | string;
/** Elements that can be passed to manipulation methods, including functions. */
export type AcceptedElems<T extends AnyNode> =
| BasicAcceptedElems<T>
| ((this: T, i: number, el: T) => BasicAcceptedElems<T>);
/** Function signature, for traversal methods. */
export type FilterFunction<T> = (this: T, i: number, el: T) => boolean;
/** Supported filter types, for traversal methods. */
export type AcceptedFilters<T> = string | FilterFunction<T> | T | Cheerio<T>;

View File

@@ -0,0 +1,32 @@
import { describe, it, expect } from 'vitest';
import * as utils from './utils.js';
describe('util functions', () => {
it('camelCase function test', () => {
expect(utils.camelCase('cheerio.js')).toBe('cheerioJs');
expect(utils.camelCase('camel-case-')).toBe('camelCase');
expect(utils.camelCase('__directory__')).toBe('_directory_');
expect(utils.camelCase('_one-two.three')).toBe('OneTwoThree');
});
it('cssCase function test', () => {
expect(utils.cssCase('camelCase')).toBe('camel-case');
expect(utils.cssCase('jQuery')).toBe('j-query');
expect(utils.cssCase('neverSayNever')).toBe('never-say-never');
expect(utils.cssCase('CSSCase')).toBe('-c-s-s-case');
});
it('isHtml function test', () => {
expect(utils.isHtml('<html>')).toBe(true);
expect(utils.isHtml('\n<html>\n')).toBe(true);
expect(utils.isHtml('#main')).toBe(false);
expect(utils.isHtml('\n<p>foo<p>bar\n')).toBe(true);
expect(utils.isHtml('dog<p>fox<p>cat')).toBe(true);
expect(utils.isHtml('<p>fox<p>cat')).toBe(true);
expect(utils.isHtml('\n<p>fox<p>cat\n')).toBe(true);
expect(utils.isHtml('#<p>fox<p>cat#')).toBe(true);
expect(utils.isHtml('<!-- comment -->')).toBe(true);
expect(utils.isHtml('<!doctype html>')).toBe(true);
expect(utils.isHtml('<123>')).toBe(false);
});
});

View File

@@ -0,0 +1,93 @@
import type { AnyNode } from 'domhandler';
import type { Cheerio } from './cheerio.js';
/**
* Checks if an object is a Cheerio instance.
*
* @category Utils
* @param maybeCheerio - The object to check.
* @returns Whether the object is a Cheerio instance.
*/
export function isCheerio<T>(maybeCheerio: any): maybeCheerio is Cheerio<T> {
return maybeCheerio.cheerio != null;
}
/**
* Convert a string to camel case notation.
*
* @private
* @category Utils
* @param str - The string to be converted.
* @returns String in camel case notation.
*/
export function camelCase(str: string): string {
return str.replace(/[._-](\w|$)/g, (_, x) => x.toUpperCase());
}
/**
* Convert a string from camel case to "CSS case", where word boundaries are
* described by hyphens ("-") and all characters are lower-case.
*
* @private
* @category Utils
* @param str - The string to be converted.
* @returns String in "CSS case".
*/
export function cssCase(str: string): string {
return str.replace(/[A-Z]/g, '-$&').toLowerCase();
}
/**
* Iterate over each DOM element without creating intermediary Cheerio
* instances.
*
* This is indented for use internally to avoid otherwise unnecessary memory
* pressure introduced by _make.
*
* @category Utils
* @param array - The array to iterate over.
* @param fn - Function to call.
* @returns The original instance.
*/
export function domEach<
T extends AnyNode,
Arr extends ArrayLike<T> = Cheerio<T>,
>(array: Arr, fn: (elem: T, index: number) => void): Arr {
const len = array.length;
for (let i = 0; i < len; i++) fn(array[i], i);
return array;
}
const enum CharacterCodes {
LowerA = 97,
LowerZ = 122,
UpperA = 65,
UpperZ = 90,
Exclamation = 33,
}
/**
* Check if string is HTML.
*
* Tests for a `<` within a string, immediate followed by a letter and
* eventually followed by a `>`.
*
* @private
* @category Utils
* @param str - The string to check.
* @returns Indicates if `str` is HTML.
*/
export function isHtml(str: string): boolean {
const tagStart = str.indexOf('<');
if (tagStart < 0 || tagStart > str.length - 3) return false;
const tagChar = str.charCodeAt(tagStart + 1);
return (
((tagChar >= CharacterCodes.LowerA && tagChar <= CharacterCodes.LowerZ) ||
(tagChar >= CharacterCodes.UpperA && tagChar <= CharacterCodes.UpperZ) ||
tagChar === CharacterCodes.Exclamation) &&
str.includes('>', tagStart + 2)
);
}