website Link to heading
url https://dudung.github.io/web2scrap/elem-class-attr
<!DOCTYPE html>
<html>
<head>
<title>Title of the document</title>
</head>
<body>
<div class="abcd">
<div class="efgh">
<span class="iiii">Apple</span>
<span>Cat</span>
</div>
<div class="efgh">
<a jsname="jjjj" href="https://www.google.com/">Google</a>
</div>
<div class="efgh">
<cite city="serang">Surabaya</cite>
<cite>Malang</cite>
</div>
</div>
</body>
</html>
js code Link to heading
url https://github.com/Darshan972/GoogleScrapingBlogs/blob/main/GoogleorganicResultsScraper.js (original)
const unirest = require("unirest");
const cheerio = require("cheerio");
const getOrganicData = () => {
return unirest
.get("http://dudung.github.io/web2scrap/elem-class-attr")
.headers({
"User-Agent":
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/101.0.4951.54 Safari/537.36",
})
.then((response) => {
// Display content of HTML file
console.log(response.body);
let $ = cheerio.load(response.body);
let body = [];
$("body").each((i, el) => {
body[i] = $(el).text();
});
console.log(body);
console.log();
let abcd = [];
$(".abcd").each((i, el) => {
abcd[i] = $(el).text();
});
console.log(abcd);
console.log();
let efgh = [];
$(".efgh > span").each((i, el) => {
efgh[i] = $(el).text();
});
console.log(efgh);
console.log();
let kota_attr = [];
$("cite").each((i, el) => {
kota_attr[i] = $(el).attr("city");
});
console.log(kota_attr);
console.log();
let kota_cont = [];
$("cite").each((i, el) => {
kota_cont[i] = $(el).text();
});
console.log(kota_cont);
console.log();
let kota_attr_cont = [];
$("cite[city='serang']").each((i, el) => {
kota_attr_cont[i] = $(el).text();
});
console.log(kota_attr_cont);
console.log();
let a1 = [];
$("a[jsname='jjjj']").each((i, el) => {
a1[i] = $(el).text();
});
console.log(a1);
console.log();
let a2 = [];
$("a[jsname='jjjj']").each((i, el) => {
a2[i] = $(el).attr("href");
});
console.log(a2);
console.log();
});
};
getOrganicData();
html file Link to heading
- Result
<!DOCTYPE html> <html> <head> <title>Title of the document</title> </head> <body> <div class="abcd"> <div class="efgh"> <span class="iiii">Apple</span> <span>Cat</span> </div> <div class="efgh"> <a jsname="jjjj" href="https://www.google.com/">Google</a> </div> <div class="efgh"> <cite city="serang">Surabaya</cite> <cite>Malang</cite> </div> </div> </body> </html>
- Snippet
console.log(response.body);
elem body to text Link to heading
- Result
[ '\n' + ' \n' + ' \n' + ' \n' + ' Apple\n' + ' Cat\n' + ' \n' + '\n' + ' \n' + ' Google\n' + ' \n' + '\n' + ' \n' + ' Surabaya\n' + ' Malang\n' + ' \n' + ' \n' + ' \n' + ' \n' + '\n' + '\n' ]
- Snippet
let body = []; $("body").each((i, el) => { body[i] = $(el).text(); }); console.log(body); console.log();
class abcd to text Link to heading
- Result
[ '\n' + ' \n' + ' Apple\n' + ' Cat\n' + ' \n' + '\n' + ' \n' + ' Google\n' + ' \n' + '\n' + ' \n' + ' Surabaya\n' + ' Malang\n' + ' \n' + ' ' ]
- Code
let abcd = []; $(".abcd").each((i, el) => { abcd[i] = $(el).text(); }); console.log(abcd); console.log();
class efgh, elem span to text Link to heading
- Result
[ 'Apple', 'Cat' ]
- Code
let efgh = []; $(".efgh > span").each((i, el) => { efgh[i] = $(el).text(); }); console.log(efgh); console.log();
elem cite, attr city Link to heading
- Result
[ 'serang', undefined ]
- Code
let kota_attr = [];
$("cite").each((i, el) => {
kota_attr[i] = $(el).attr("city");
});
console.log(kota_attr);
console.log();
elem cite to text Link to heading
- Result
[ 'Surabaya', 'Malang' ]
- Code
let kota_cont = [];
$("cite").each((i, el) => {
kota_cont[i] = $(el).text();
});
console.log(kota_cont);
console.log();
elem attr to text Link to heading
- Result
[ 'Surabaya' ]
- Code
let kota_attr_cont = []; $("cite[city='serang']").each((i, el) => { kota_attr_cont[i] = $(el).text(); }); console.log(kota_attr_cont); console.log();
elem attr to text Link to heading
- Result
[ 'Google' ]
- Code
let a1 = [];
$("a[jsname='jjjj']").each((i, el) => {
a1[i] = $(el).text();
});
console.log(a1);
console.log();
elem attr attr Link to heading
- Result
[ 'https://www.google.com/' ]
- Code
let a2 = []; $("a[jsname='jjjj']").each((i, el) => { a2[i] = $(el).attr("href"); }); console.log(a2); console.log();