butiran

scraping with cheerio

website

url https://dudung.github.io/web2scrap/elem-class-attr


<!DOCTYPE html>
<html>

  <head>
  <title>Title of the document</title>
  </head>

  <body>
  
    <div class="abcd">
      <div class="efgh">
        <span class="iiii">Apple</span>
        <span>Cat</span>
      </div>

      <div class="efgh">
        <a jsname="jjjj" href="https://www.google.com/">Google</a>
      </div>

      <div class="efgh">
        <cite city="serang">Surabaya</cite>
        <cite>Malang</cite>
      </div>
    </div>
  
  </body>

</html>

js code

url https://github.com/Darshan972/GoogleScrapingBlogs/blob/main/GoogleorganicResultsScraper.js (original)

const unirest = require("unirest");
const cheerio = require("cheerio");

const getOrganicData = () => {
  return unirest
    .get("http://dudung.github.io/web2scrap/elem-class-attr")
    .headers({
      "User-Agent":
        "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/101.0.4951.54 Safari/537.36",
    })
    .then((response) => {
      
      // Display content of HTML file
      console.log(response.body);
      
      let $ = cheerio.load(response.body);
      
      let body = [];
      $("body").each((i, el) => {
        body[i] = $(el).text();
      });
      console.log(body);
      console.log();
      
      let abcd = [];
      $(".abcd").each((i, el) => {
        abcd[i] = $(el).text();
      });
      console.log(abcd);
      console.log();

      let efgh = [];
      $(".efgh > span").each((i, el) => {
        efgh[i] = $(el).text();
      });
      console.log(efgh);
      console.log();
      
      let kota_attr = [];
      $("cite").each((i, el) => {
        kota_attr[i] = $(el).attr("city");
      });
      console.log(kota_attr);
      console.log();
      
      let kota_cont = [];
      $("cite").each((i, el) => {
        kota_cont[i] = $(el).text();
      });
      console.log(kota_cont);
      console.log();
      
      let kota_attr_cont = [];
      $("cite[city='serang']").each((i, el) => {
        kota_attr_cont[i] = $(el).text();
      });
      console.log(kota_attr_cont);
      console.log();
      
      let a1 = [];
      $("a[jsname='jjjj']").each((i, el) => {
        a1[i] = $(el).text();
      });
      console.log(a1);
      console.log();
      
      let a2 = [];
      $("a[jsname='jjjj']").each((i, el) => {
        a2[i] = $(el).attr("href");
      });
      console.log(a2);
      console.log();
    });
};

getOrganicData();

html file

elem body to text

class abcd to text

class efgh, elem span to text

elem cite, attr city

let kota_attr = [];
$("cite").each((i, el) => {
  kota_attr[i] = $(el).attr("city");
});
console.log(kota_attr);
console.log();

elem cite to text

let kota_cont = [];
$("cite").each((i, el) => {
  kota_cont[i] = $(el).text();
});
console.log(kota_cont);
console.log();

elem attr to text

elem attr to text

let a1 = [];
$("a[jsname='jjjj']").each((i, el) => {
  a1[i] = $(el).text();
});
console.log(a1);
console.log();

elem attr attr