website Link to heading

url https://dudung.github.io/web2scrap/elem-class-attr


<!DOCTYPE html>
<html>

  <head>
  <title>Title of the document</title>
  </head>

  <body>
  
    <div class="abcd">
      <div class="efgh">
        <span class="iiii">Apple</span>
        <span>Cat</span>
      </div>

      <div class="efgh">
        <a jsname="jjjj" href="https://www.google.com/">Google</a>
      </div>

      <div class="efgh">
        <cite city="serang">Surabaya</cite>
        <cite>Malang</cite>
      </div>
    </div>
  
  </body>

</html>

js code Link to heading

url https://github.com/Darshan972/GoogleScrapingBlogs/blob/main/GoogleorganicResultsScraper.js (original)

const unirest = require("unirest");
const cheerio = require("cheerio");

const getOrganicData = () => {
  return unirest
    .get("http://dudung.github.io/web2scrap/elem-class-attr")
    .headers({
      "User-Agent":
        "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/101.0.4951.54 Safari/537.36",
    })
    .then((response) => {
      
      // Display content of HTML file
      console.log(response.body);
      
      let $ = cheerio.load(response.body);
      
      let body = [];
      $("body").each((i, el) => {
        body[i] = $(el).text();
      });
      console.log(body);
      console.log();
      
      let abcd = [];
      $(".abcd").each((i, el) => {
        abcd[i] = $(el).text();
      });
      console.log(abcd);
      console.log();

      let efgh = [];
      $(".efgh > span").each((i, el) => {
        efgh[i] = $(el).text();
      });
      console.log(efgh);
      console.log();
      
      let kota_attr = [];
      $("cite").each((i, el) => {
        kota_attr[i] = $(el).attr("city");
      });
      console.log(kota_attr);
      console.log();
      
      let kota_cont = [];
      $("cite").each((i, el) => {
        kota_cont[i] = $(el).text();
      });
      console.log(kota_cont);
      console.log();
      
      let kota_attr_cont = [];
      $("cite[city='serang']").each((i, el) => {
        kota_attr_cont[i] = $(el).text();
      });
      console.log(kota_attr_cont);
      console.log();
      
      let a1 = [];
      $("a[jsname='jjjj']").each((i, el) => {
        a1[i] = $(el).text();
      });
      console.log(a1);
      console.log();
      
      let a2 = [];
      $("a[jsname='jjjj']").each((i, el) => {
        a2[i] = $(el).attr("href");
      });
      console.log(a2);
      console.log();
    });
};

getOrganicData();

html file Link to heading

  • Result
    <!DOCTYPE html>
    <html>
    
      <head>
      <title>Title of the document</title>
      </head>
    
      <body>
    
        <div class="abcd">
          <div class="efgh">
            <span class="iiii">Apple</span>
            <span>Cat</span>
          </div>
    
          <div class="efgh">
            <a jsname="jjjj" href="https://www.google.com/">Google</a>
          </div>
    
          <div class="efgh">
            <cite city="serang">Surabaya</cite>
            <cite>Malang</cite>
          </div>
        </div>
    
      </body>
    
    </html>
    
  • Snippet
    console.log(response.body);
    

elem body to text Link to heading

  • Result
    [
      '\n' +
        '  \n' +
        '    \n' +
        '      \n' +
        '        Apple\n' +
        '        Cat\n' +
        '      \n' +
        '\n' +
        '      \n' +
        '        Google\n' +
        '      \n' +
        '\n' +
        '      \n' +
        '        Surabaya\n' +
        '        Malang\n' +
        '      \n' +
        '    \n' +
        '  \n' +
        '  \n' +
        '\n' +
        '\n'
    ]
    
  • Snippet
    let body = [];
    $("body").each((i, el) => {
      body[i] = $(el).text();
    });
    console.log(body);
    console.log();
    

class abcd to text Link to heading

  • Result
    [
      '\n' +
        '      \n' +
        '        Apple\n' +
        '        Cat\n' +
        '      \n' +
        '\n' +
        '      \n' +
        '        Google\n' +
        '      \n' +
        '\n' +
        '      \n' +
        '        Surabaya\n' +
        '        Malang\n' +
        '      \n' +
        '    '
    ]
    
  • Code
    let abcd = [];
    $(".abcd").each((i, el) => {
      abcd[i] = $(el).text();
    });
    console.log(abcd);
    console.log();
    

class efgh, elem span to text Link to heading

  • Result
    [ 'Apple', 'Cat' ]
    
  • Code
    let efgh = [];
    $(".efgh > span").each((i, el) => {
      efgh[i] = $(el).text();
    });
    console.log(efgh);
    console.log();
    

elem cite, attr city Link to heading

  • Result
    [ 'serang', undefined ]
    
  • Code
let kota_attr = [];
$("cite").each((i, el) => {
  kota_attr[i] = $(el).attr("city");
});
console.log(kota_attr);
console.log();

elem cite to text Link to heading

  • Result
    [ 'Surabaya', 'Malang' ]
    
  • Code
let kota_cont = [];
$("cite").each((i, el) => {
  kota_cont[i] = $(el).text();
});
console.log(kota_cont);
console.log();

elem attr to text Link to heading

  • Result
    [ 'Surabaya' ]
    
  • Code
    let kota_attr_cont = [];
    $("cite[city='serang']").each((i, el) => {
      kota_attr_cont[i] = $(el).text();
    });
    console.log(kota_attr_cont);
    console.log();
    

elem attr to text Link to heading

  • Result
    [ 'Google' ]
    
  • Code
let a1 = [];
$("a[jsname='jjjj']").each((i, el) => {
  a1[i] = $(el).text();
});
console.log(a1);
console.log();

elem attr attr Link to heading

  • Result
    [ 'https://www.google.com/' ]
    
  • Code
    let a2 = [];
    $("a[jsname='jjjj']").each((i, el) => {
      a2[i] = $(el).attr("href");
    });
    console.log(a2);
    console.log();