<template>
  <section class="site-crawler">
    <h1>Site Crawler</h1>
    <CrawlControls :previous-crawls="previousCrawls" />
    <CrawlResults />
  </section>
</template>

<script>
import axios from "axios";
import cheerio from "cheerio";

import CrawlResults from "./CrawlResults.vue";
import CrawlControls from "./CrawlControls.vue";
import qs from "qs";

export default {
  components: {CrawlResults, CrawlControls},
  props:      {
    previousCrawls: {
      type:    Array,
      default: []
    },
  },

  data() {
    return {
      previousCrawlDataLoaded: false,
      crawlPaused:             false,
      elapsedTime:             0,
      show200Dialog:           false,
      isBusy:                  true,
      crawlSite:               false,
      busyMakingThreads:       false,
      oneTimeElementsCrawled:  false,
      maxThreads:              5,
      threadProcesses:         {},
      location:                "https://" + window.location.hostname,
      maxPages:                5000,
      sitemapOptions:          [
        '/sitemap_index.xml'
      ],
      results:                 {
        requestedURLs: [],
        status200:     [],
        status300:     [],
        status400:     [],
        status500:     [],
        statusUnknown: [],
        noResponse:    [],
        requestError:  [],
        gravityForms:  {
          urls: [],
          data: []
        },
        images:        {
          urls: [],
          data: []
        },
      },
      progressStats:           {
        elapsedTime:      0,
        totalRequests:    0,
        totalResponses:   0,
        URLsToVisitCount: 0,
        URLsToVisitList:  []
      },
    };
  },
  async mounted() {
    // console.log('SiteCrawler component mounted');
    console.log(this.previousCrawls);
    this.progressStats.URLsToVisitList.push(this.origin);
    await this.fetchSiteMaps();
    this.isBusy = false;
    console.log(this.progressStats.URLsToVisitList);
  },
  computed: {
    origin() {
      return !this.location.endsWith("/") ? this.location + "/" : this.location;
    },
    isCrawling() {
      return this.crawlSite && Object.keys(this.threadProcesses).length;
    },
    displayFinalResults() {
      return (this.crawlSite && !Object.keys(this.threadProcesses).length && !this.crawlPaused) || this.previousCrawlDataLoaded;
    },
  },
  methods:  {
    hash() {
      return (Math.random() + 1).toString(36).substring(7)
    },
    timer() {
      if (this.isCrawling && !this.crawlPaused) {
        setTimeout(() => {
          this.elapsedTime++
          this.progressStats.elapsedTime = this.elapsedTime / 10;
          this.timer();
        }, 100)
      }
    },
    async fetchSiteMaps() {
      console.log('Attempting to fetch sitemaps...');
      for (let i = 0; i < this.sitemapOptions.length; i++) {
        console.log('fetching: ' + this.sitemapOptions[i]);
        await axios.get(this.origin + this.sitemapOptions[i])
            .then(response => {
              console.log('found: ' + this.sitemapOptions[i]);
              if (response.status === 200) {
                let linkURL = this.origin + this.sitemapOptions[i];
                this.processURL(linkURL);
              }
            }).catch(function (error) {
              // console.log(error);
              const statusCode = error.response.status.toString();
              if (statusCode.startsWith('4')) {
                console.log('failed: ' + error.config.url);
              } else {
                console.log('unknown error fetching: ' + error.config.url)
              }
            });
      }
      console.log('---');
    },
    processURL(href) {
      let url = href.toLowerCase();

      if (
          !url.endsWith('.pdf') &&
          !url.endsWith('.jpg') &&
          !url.endsWith('.jpeg') &&
          !url.endsWith('.png') &&
          !url.endsWith('.txt') &&
          !url.endsWith('.doc') &&
          !url.endsWith('.docx') &&
          !url.endsWith('.php') &&
          !url.endsWith('.css') &&
          !url.endsWith('.js')
      ) {
        // Add a trailing slash if it is missing
        url = !url.endsWith("/") ? url + "/" : url;

        // only proceed if there are no query params
        if (!url.includes("?") && !url.includes("#")) {

          // check if the linkURL stars with the protocol and domain of the origin
          if (url.startsWith(this.origin)) {

            // if we haven't already requested the URL, and it isn't already in our queue
            if (!this.results.requestedURLs.includes(url) && !this.progressStats.URLsToVisitList.includes(url)) {
              // console.log(url);
              this.progressStats.URLsToVisitList.push(url);
              this.progressStats.URLsToVisitCount = this.progressStats.URLsToVisitList.length;
            }
          }
          // If it does not, determine if it is a relative URL for the current domain
          else if (url.startsWith('/') && !url.startsWith('/cdn')) {
            if (url.startsWith('/../')) {
              url = url.replace('/../', '/');
            }
            let absoluteURL = this.origin + url;
            if (!this.results.requestedURLs.includes(absoluteURL) && !this.progressStats.URLsToVisitList.includes(absoluteURL)) {
              this.progressStats.URLsToVisitList.push(absoluteURL);
              this.progressStats.URLsToVisitCount = this.progressStats.URLsToVisitList.length;
            }
          }
        }
      }
    },
    crawlPage(url, hash) {
      console.log('Processing: ' + url + ". Thread: " + hash);
      if (this.crawlSite) {

        // make sure the url has not been requested do to asynchronous activity.
        if (!this.results.requestedURLs.includes(url)) {
          // console.log("processURL(" + url + ", " + hash + ")");

          // increase the count of pages requested in our results object
          this.progressStats.totalRequests++;
          this.results.requestedURLs.push(url);

          // requesting the current url
          axios.get(url)
              .then(response => {
                    if (response.status === 200) {

                      // increase the results count
                      this.progressStats.totalResponses++;

                      // initializing cheerio on the currentURLs pageHTML
                      let pageHTML = response.data;
                      const $ = cheerio.load(pageHTML);

                      // get the size of the page
                      const pagebytes = $('html').html().length;
                      const kbytes = Math.round((pagebytes / 1024) * 100) / 100;

                      // Setup an images array to collect image data as DOM is processed
                      let images = [];

                      /**
                       * get an object containing all the links within pageHTML
                       * only crawl the header and footer if not already crawled.
                       **/
                      let elements;
                      if (!this.oneTimeElementsCrawled) {
                        elements = $('*:not(script):not(style)');
                        this.oneTimeElementsCrawled = true;
                      } else {
                        elements = $('*:not(script):not(style):not(.et-l--header a):not(.et-l--footer a):not(.df-post-categories-wrap a):not(.df-post-title a):not(.df-post-image-wrap a)');
                      }
                      // console.log(pageLinks.length + " page-links found on " + url);

                      // iterate over each link
                      elements.each((index, element) => {

                        // Find any href attributes
                        let hrefAttr = $(element).attr("href");
                        if (hrefAttr && !hrefAttr.includes('/wp-json/')) {
                          // ensure we want to crawl the discovered URL
                          this.processURL(hrefAttr);
                        }

                        // Find any src attributes
                        let srcAttr = $(element).attr('src');
                        if (srcAttr) {
                          images.push(srcAttr);
                          if (!this.results.images.urls.includes(srcAttr)) {
                            this.results.images.urls.push(srcAttr);
                            this.results.images.data.push({url: srcAttr});
                          }
                        }

                        // Find elements with background-image css property set
                        let bgImageUrl = $(element).css('background-image');
                        if (bgImageUrl) {
                          bgImageUrl = bgImageUrl.replace("'", "");
                          bgImageUrl = bgImageUrl.substring(4, bgImageUrl.length - 1)
                          // console.log(bgImage);
                          images.push(bgImageUrl);

                          if (!this.results.images.urls.includes(bgImageUrl)) {
                            this.results.images.urls.push(bgImageUrl);
                            this.results.images.data.push({url: bgImageUrl});
                          }
                        }

                        // find gravity forms
                        if ($(element).hasClass('gform_wrapper')) {
                          let form = $(element).find("form");
                          let formId = form.attr("data-formid");
                          this.results.gravityForms.urls.push(url);
                          this.results.gravityForms.data.push({
                            url: url,
                            formId: formId
                          });
                          console.log("Gravity Form found on " + url);
                        }
                      });

                      // console.log(this.results.images);

                      // push the current URL to the results object
                      this.results.status200.push(
                          {
                            url:         url,
                            size:        kbytes,
                            images:      images,
                            imagesCount: images.length
                          }
                      );


                      // Create threads for remaining URLsToVisitList
                      if (this.progressStats.URLsToVisitList.length && Object.keys(this.threadProcesses).length <= this.maxThreads) {
                        this.busyMakingThreads = true;
                        while (this.progressStats.URLsToVisitList.length && Object.keys(this.threadProcesses).length <= this.maxThreads) {
                          let hash = this.hash();
                          let url = this.progressStats.URLsToVisitList.pop();
                          this.progressStats.URLsToVisitCount = this.progressStats.URLsToVisitList.length;

                          this.threadProcesses[hash] = {
                            url:  url,
                            hash: hash,
                          }
                          this.crawlPage(url, hash);
                        }
                        this.busyMakingThreads = false;
                      }

                      // Delete the threadProcess
                      delete this.threadProcesses[hash];

                      // If this is the final thread, save the results
                      if (this.displayFinalResults) {
                        this.saveResults();
                      }

                    } else {
                      console.log('axios response, but not 200?')
                      this.progressStats.totalResponses++;
                      delete this.threadProcesses[hash];
                    }
                  }
              )
              .catch(error => {
                if (error.response) {
                  // The request was made and the server responded
                  // with a status code that falls out of the range of 2xx
                  const statusCode = error.response.status.toString();
                  if (statusCode.startsWith('3')) {
                    this.results.status300.push({status: statusCode, url: url, hash: hash, size: 0});
                    console.log("300 Response: " + url);
                  } else if (statusCode.startsWith('4')) {
                    this.results.status400.push({status: statusCode, url: url, hash: hash, size: 0});
                    console.log("400 Response: " + url);
                  } else if (statusCode.startsWith('5')) {
                    this.results.status500.push({status: statusCode, url: url, hash: hash, size: 0});
                    console.log("500 Response: " + url);
                  } else {
                    this.results.statusUnknown.push({status: statusCode, url: url, hash: hash, size: 0})
                    console.log("Status Unknown Response: " + url);
                  }
                } else if (error.request) {
                  this.results.noResponse.push({
                    status: 'No Response', url: url, hash: hash, size: 0, error: error.request
                  });
                  console.log(url + ': Request was made but no response was received');
                  console.log(error.request);
                } else {

                  this.results.requestError.push({
                    status: 'requestError', url: url, hash: hash, size: 0, error: error.message
                  });
                  console.log(url + ': There was an error setting up the request');
                  console.log('Error: ', error.message);
                }

                this.progressStats.totalResponses++;

                // Delete the threadProcess
                delete this.threadProcesses[hash];

                if (this.displayFinalResults) {
                  this.saveResults();
                }
              });
        } else {
          // Delete the threadProcess
          delete this.threadProcesses[hash];
        }
      }
    },
    startCrawl() {
      let hash = this.hash();
      this.threadProcesses[hash] = {
        url:  this.origin,
        hash: hash,
      }

      this.crawlSite = true;
      this.crawlPaused = false;
      this.crawlPage(this.origin, hash);
      this.timer();
    },
    toggleCrawl() {
      this.crawlSite = !this.crawlSite;
      this.crawlPaused = !this.crawlPaused;

      if (this.crawlSite && !this.crawlPaused) {
        console.log(this.threadProcesses);
        this.timer();
        for (const [hash, value] of Object.entries(this.threadProcesses)) {
          this.crawlPage(value.url, hash);
        }
      }
    },
    async resetCrawl() {
      this.isBusy = true;
      if (Object.keys(this.threadProcesses).length) {
        setTimeout(() => {
          this.resetCrawl();
        }, 100)
      } else {
        this.previousCrawlDataLoaded = false;
        this.previousCrawl = false;

        this.crawlPaused = false;
        this.elapsedTime = 0;
        this.show200Dialog = false;

        this.crawlSite = false;
        this.busyMakingThreads = false;
        this.oneTimeElementsCrawled = false;
        this.threadProcesses = {};
        this.results = {
          requestedURLs: [],
          status200:     [],
          status300:     [],
          status400:     [],
          status500:     [],
          statusUnknown: [],
          noResponse:    [],
          requestError:  [],
          gravityForms:  [],
          images:        {
            urls: [],
            data: []
          },
        };
        this.progressStats = {
          elapsedTime:      0,
          totalRequests:    0,
          totalResponses:   0,
          URLsToVisitCount: 0,
          URLsToVisitList:  []
        }

        this.progressStats.URLsToVisitList.push(this.origin);
        await this.fetchSiteMaps();
        this.isBusy = false;
      }
    },
    saveResults() {
      console.log('Saving Results...');
      let data = {
        action: "save_crawl_data_to_file",
        data:   this.results,
      };
      axios
          .post(
              window.lhp_vc_data.ajax_url,
              qs.stringify(data)
          )
          .then((response) => {
            console.log("Crawl Data Saved to" + response.data.file);
            console.log(response.data.results);
          })
          .catch((error) => {
            console.log(error)
          });
    },

  }
}
</script>