# -*- shell-script -*-
# gfunc83120-extract-information.sh -- Determine where our videos are located in, and whether their addresses can be parsed with free software
# Copyright © 2015-2016 Michael Pagan
#
# Author: Michael Pagan
# E-Mail: michael.pagan@member.fsf.org
# Jabber: pegzmasta@member.fsf.org
#
# This file is part of Genshiken.
#
# Genshiken is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 3 of the License, or
# (at your option) any later version.
#
# Genshiken is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with Genshiken. If not, see http://www.gnu.org/licenses/.
#===================================================================
function Extract_Information
{
  # Let's get some <<extra-information>> from the episode website, like: =\
  #                                                                       |
  #  o  Which sites are hosting content online for this episode           |
  #  o  Which use proprietary software (i.e. launching a non-free app)    |
  #  o  Which are providing a valid video path for your browser (ethical) |
  #                                                                       |
  # Video_Players =  Player_#         : Host_Name                         |
  # Host_Domains  =  Host_Name/player : Domain_Name                       |
  # Selected_Host =  1st-ethical-host ====================================/
  local Video_Players=$(   grep 'postTabsLinks'   $1 | sed -e 's|.*postTabs_li_\(.\)_.*([^A-Za-z]\(.*\)[^A-Za-z]);[ ].*|Player \1:\2|'              -e 's/ /_/g')
  local Host_Domains=$(    grep 'postTabs_titles' $1 | sed -e 's_.*<b>\(.*\)</b>.*src=.http://\(.*\)\.\(.*\)\.\(.*\)_\1:\2.\3.\4/_' -e 's_/.*__'    -e 's/ /_/g')
  local Ethical_Hosts=$(   grep 'postTabs_titles' $1 | grep     'file' | grep -v 'application' | gawk -F'<b>' '{ print $2 }' |  sed -e 's_</b>.*__' -e 's/ /_/g')
  local Unethical_Hosts=$( grep 'postTabs_titles' $1 | grep                      'application' | gawk -F'<b>' '{ print $2 }' |  sed -e 's_</b>.*__' -e 's/ /_/g')
  local Unreadable_Hosts=$(grep 'postTabs_titles' $1 | egrep -v 'file|application' | gawk -F'<b>' '{ print $2 }'             |  sed -e 's_</b>.*__' -e 's/ /_/g')

  # Let's remove duplicate players/domains, for they interfere with my REGEX
  for i in `wc -l <<< "$Video_Players"`
  do
    local total_players=${total_players:=$i}
    local clones=${clones:=1}
    [[ $clones -ge $total_players ]] && break
    [[ `grep "$(sed -n "$clones p" <<< "$Video_Players" | gawk -F: '{ print $2 }')" <<< "$Video_Players" | wc -l` -ge 2 ]] &&
    {
      local duplicates=$(sed -n '2,+ p' <<< "$(grep "$(sed -n "$clones p" <<< "$Video_Players" | gawk -F: '{ print $2 }')" <<< "$Video_Players")")
      Video_Players=$(grep -v "$duplicates" <<< "$Video_Players")
      total_players=$(expr $total_players '-' $(wc -l <<< "$duplicates"))
    }
  done
  # Remove duplicates, as well as a possible newline character which may get fixed onto the first line
  Host_Domains=$(echo $(uniq     <<< $(echo $Host_Domains     | sed -n '1 p' | tr -d '\n'; echo $Host_Domains     | sed -n '2,$ p')) | tr ' ' '\n')
  Ethical_Hosts=$(echo $(uniq    <<< $(echo $Ethical_Hosts    | sed -n '1 p' | tr -d '\n'; echo $Ethical_Hosts    | sed -n '2,$ p')) | tr ' ' '\n')
  Unethical_Hosts=$(echo $(uniq  <<< $(echo $Unethical_Hosts  | sed -n '1 p' | tr -d '\n'; echo $Unethical_Hosts  | sed -n '2,$ p')) | tr ' ' '\n')
  Unreadable_Hosts=$(echo $(uniq <<< $(echo $Unreadable_Hosts | sed -n '1 p' | tr -d '\n'; echo $Unreadable_Hosts | sed -n '2,$ p')) | tr ' ' '\n')

  #  Ensure that we can access the webpage for each ethical host.                                            |
  #+ If we're not able to, then the resource is not freely available to us and will be labelled as unethical |
  for host in $(echo "$Ethical_Hosts")
  do
    # If the domain for the current player is a bad address, then fix it and update the $Host_Domains list
    domain=$(grep "$host" <<< "$Host_Domains" | gawk -F: '{ print $2 }')
    if [[ -z ${domain:-} || -z `grep "$host" <<< "$Host_Domains" | grep ':' | grep '\.'` ]]
    then
      [[ -f domain.log ]] && rm domain.log
      grep "$host" <<< "$Host_Domains" > domain.log
      domain=$(echo "$(sed '/<script.*/,/<meta.*/ d' $1 | sed -n "/>$host</,/src/ { /src/ P }" | eval $HOST_LOCATE)" |\
               egrep 'http|www|com|io' | sed -n '1 p')
      [[ -z ${Host_Domains:-} ]] && Host_Domains=${host}:${domain} ||
      {
        [[ -n `grep "$host" <<< "$Host_Domains"` ]] &&
        Host_Domains=$(grep "$(< domain.log)" <<< "$Host_Domains" | sed "s,.*,${host}:${domain},") ||
        Host_Domains=$(echo "$Host_Domains"; echo "${host}:${domain}")
      }
    fi
    supposedly_ethical_link=$(grep 'file' $1 | grep "$domain" | sed -e 's_.*src="\(.*\)"_\1_' -e 's_".*__' -e 's_>__' -e 's_<.*>__g' |\
                              eval $BROWSER_READABLE | grep '^http' | sed -n '1 s/.*/&/p')

    # Attempt to download the page
    wget -q $supposedly_ethical_link -O "${domain//[0-9]*/static}.html"

    # If the page is not available: Edit the $Ethical_Hosts list, and append this host to the $Unethical_Hosts list
    if [[ $? = 8 ]]; then
      Ethical_Hosts=$(grep -v "$host" <<< "$Ethical_Hosts")
      Unethical_Hosts=$(echo "$Unethical_Hosts"; echo "$host")
    fi
  done

  # Inform the user how many players are available on the website, and list them (NOTE: I like to work with base 0)
  [[ ! -z ${Video_Players:-} ]] && local players=$(expr $(wc -l <<< "$Video_Players") '-' 1)
  if [[ ! -z    ${players:-} ]]; then
     [           $players = 0 ] && local plural1='player from a third-party site' ||
                                   local plural1='players from third-party sites'

    echo -e "\n${N}${R}[${FUNCNAME[0]}] ${B}$PROGNAME ${G}has detected ${M}$((players+1)) $plural1${G} hosted in: ${C}$1 ${N}"
    [[ ! -z ${WAIT:-} && $WAIT -le 1 ]] && : || sleep 2
    for (( j=$GNU; j<=$players; j++ )); do
      player=$(sed -n "$((j+1)) p" <<< "$Video_Players" | gawk -F: '{ print $1 }')
      host=$(  sed -n "$((j+1)) p" <<< "$Video_Players" | gawk -F: '{ print $2 }')
      echo -e "${N}${R}[${FUNCNAME[0]}] ${M}$player ${G}is: ${M}$host ${N}"
      [[ ! -z ${WAIT:-} && $WAIT -le 1 ]] && : || sleep 1
    done
  fi
  # Inform the user how many hosts are providing links that are obfuscated and can't be parsed
  [[ ! -z ${Unreadable_Hosts:-} ]] && local secret=$(expr $(wc -l <<< "$Unreadable_Hosts") '-' 1)
  if [[ ! -z        ${secret:-} ]]; then
     [               $secret = 0 ] && local plural1=is  plural2='host that is providing an embedded link that is' ||
                                      local plural1=are plural2='hosts that are providing embedded links that are'

    for (( j=$GNU; j<=$secret; j++ )); do
      player=$(sed -n "$((j+1)) p" <<< "$Unreadable_Hosts")
      domain=$(grep "$player" <<< "$Host_Domains" | gawk -F: '{ print $2 }')

      # If the domain for the current player is a bad address, then fix it and update the $Host_Domains list
      if [[ -z ${domain:-} || -z `grep "$player" <<< "$Host_Domains" | grep ':' | grep '\.'` ]]
      then
        [[ -f domain.log ]] && rm domain.log
        grep "$player" <<< "$Host_Domains" > domain.log
        domain=$(echo "$(sed '/<script.*/,/<meta.*/ d' $1 | sed -n "/>${player/_/ }</,/src/ { /src/ P }" | eval $HOST_LOCATE)" |\
                 egrep 'http|www|com|io' | sed -n '1 p')
        [[ -z ${Host_Domains:-} ]] && Host_Domains=${player}:${domain} ||
        {
          [[ -n `grep "$player" <<< "$Host_Domains"` ]] &&
          Host_Domains=$(grep "$(< domain.log)" <<< "$Host_Domains"| sed "s,.*,${player}:${domain},") ||
          Host_Domains=$(echo "$Host_Domains"; echo "${player}:${domain}")
        }
      fi
      #  Double check and ensure whether the current player contains a valid path.  If so, then append          |
      #+ the name of the player to the list of ethical hosts, and then continue.  We will NOT report            |
      #+ to the user what kind of player this is, until we've verified whether a valid path can be found first! |
      if [[ ! -z `sed '/<script.*/,/<meta.*/ d' $1 | sed -n "/>${player/_/ }</,/src/ { /src/ P }" | grep 'file' | grep -v 'application'` ]]
      then
        [[ ! -z ${Ethical_Hosts:-} ]] && Ethical_Hosts=$(echo "$Ethical_Hosts"; echo "$player") || Ethical_Hosts=$player
        Unreadable_Hosts=$(echo "$Unreadable_Hosts" | grep -v "$player")
        secret=$(($secret-1))
        echo -e "${N}${R}[${FUNCNAME[0]}] ${G}\"Unreadable Host\" list ammended.  ${M}$player ${G}is not unreadable. ${N}"
        continue
      else
        # If the video address links to a Flash file, then attempt to find an alternative version                                               |
        # NOTE: Websites are finally starting to replace the obsolete Flash technology!  Gnash was avoided due to excessive failure with Flash. |
        video_link=$(grep "$domain" $1 | sed -e 's_.*src="\(.*\)"_\1_' -e 's_".*__' -e 's_>__' -e 's_.*file=\(.*\)_\1_' -e 's_<.*>__g')
        case $domain in
          www.dailymotion.com)
            video_id=$(echo $video_link | sed 's_.*/swf/\(.*\)_\1_')
            if [[ -z `grep 'object_not_found' <(curl http://$domain/player/metadata/video/$video_id 2>/dev/null)` ]]
            then
              [[ ! -z ${Ethical_Hosts:-} ]] && Ethical_Hosts=$(echo "$Ethical_Hosts"; echo "$player") || Ethical_Hosts=$player
              Unreadable_Hosts=$(echo "$Unreadable_Hosts" | grep -v "$player")
              secret=$(($secret-1))
              echo -e "${N}${R}[${FUNCNAME[0]}] ${G}\"Unreadable Host\" list ammended.  ${M}$player ${G}is not unreadable. ${N}"
              continue
            fi
          ;;
          www.veoh.com)
            video_id=$(echo $video_link | sed 's/\&#038;videoAutoPlay=0//' | sed 's_.*permalinkId=\(.*\)_\1_')
            if [[ -z `grep 'No date provided' <(curl http://$domain/m/watch.php?v=$video_id 2> /dev/null)` ]]
            then
              [[ ! -z ${Ethical_Hosts:-} ]] && Ethical_Hosts=$(echo "$Ethical_Hosts"; echo "$player") || Ethical_Hosts=$player
              Unreadable_Hosts=$(echo "$Unreadable_Hosts" | grep -v "$player")
              secret=$(($secret-1))
              echo -e "${N}${R}[${FUNCNAME[0]}] ${G}\"Unreadable Host\" list ammended.  ${M}$player ${G}is not unreadable. ${N}"
              continue
            fi
          ;;
          *)
            # If their are any unethical/unreadable video links, then check to see if they can be parsed by <<youtube-dl>> before we deem them unreadable
            if [[ ! -z ${extract:-} ]] &&
               [[ ! -z `$extractors | grep "$(tr [[:upper:]] [[:lower:]] <<< "${player/_/ }")" | sed -n '1 p'` ||
                  ! -z `$extractors | grep "$(eval $GET_DOMAIN <<< "$domain" | tr [[:upper:]] [[:lower:]])" | sed -n '1 p'` ]]; then

              # Determine if `youtube-dl' can parse this link without errors
              local warning=$($extract $video_link 2>&1)
              if [[ $? -ne 0 || -n `egrep 'WARNING|ERROR' <<< "${warning:-}"` ]]; then
            
                # Report to the user all unreadable links
                player_number=$(echo "$Unreadable_Hosts" | grep -n '.*' | grep $player | gawk -F: '{ print $1 }')
                [[ $player_number -eq 1 ]] &&
                echo -e "\n${N}${R}[${FUNCNAME[0]}] ${G}There ${M}$plural1 $((secret+1)) $plural2 ${G}obfuscated and can't be parsed. ${N}  \b"
                [[ ! -z ${WAIT:-} && $WAIT -le 1 ]] && : || sleep 2
                echo -e "${N}${R}[${FUNCNAME[0]}] ${G}Unreadable Host ${M}$player_number ${G}is:\t\t ${M}$player ${G}at ${C}$domain ${N}"
                [[ ! -z ${WAIT:-} && $WAIT -le 1 ]] && : || sleep 1
                continue # some videos may be copy-restricted (often by DRM software), blocked, or deleted
              fi
              # We will loop through these variables once we've collected all python parsable links
              [[ ! -z ${python_parsable_players:-} ]] && local python_parsable_players=$(echo "$python_parsable_players"; echo "$player") ||
                                                         local python_parsable_players=$player
              [[ ! -z ${python_parsable_domains:-} ]] && local python_parsable_domains=$(echo "$python_parsable_domains"; echo "$domain") ||
                                                         local python_parsable_domains=$domain
              echo -e "${N}${R}[${FUNCNAME[0]}] ${G}\"Unreadable Host\" list ammended.  ${M}$player ${G}is not unreadable. ${N}"
            else
              # Report to the user all unreadable links
              player_number=$(echo "$Unreadable_Hosts" | grep -n '.*' | grep $player | gawk -F: '{ print $1 }')
              [[ $player_number -eq 1 ]] &&
              echo -e "\n${N}${R}[${FUNCNAME[0]}] ${G}There ${M}$plural1 $((secret+1)) $plural2 ${G}obfuscated and can't be parsed. ${N}  \b"
              [[ ! -z ${WAIT:-} && $WAIT -le 1 ]] && : || sleep 2
              echo -e "${N}${R}[${FUNCNAME[0]}] ${G}Unreadable Host ${M}$player_number ${G}is:\t\t ${M}$player ${G}at ${C}$domain ${N}"
              [[ ! -z ${WAIT:-} && $WAIT -le 1 ]] && : || sleep 1
              continue # some videos may be copy-restricted (often by DRM software), blocked, or deleted
            fi
          ;;
        esac	
      fi
    done
  fi
  # Inform the user how many hosts are providing links that require you to install and use proprietary software
  [[ ! -z ${Unethical_Hosts:-} ]] && local proprietary=$(expr $(wc -l <<< "$Unethical_Hosts") '-' 1)
  if [[ ! -z  ${proprietary:-} ]]; then
     [         $proprietary = 0 ] && local plural1=is  plural2="host that would require ${USER^?} to" ||
                                     local plural1=are plural2="hosts that would require ${USER^?} to"

    for (( j=$GNU; j<=$proprietary; j++ )); do
      player=$(sed -n "$((j+1)) p" <<< "$Unethical_Hosts")
      domain=$(grep "$player" <<< "$Host_Domains" | gawk -F: '{ print $2 }')

      # If the domain for the current player is a bad address, then fix it and update the $Host_Domains list
      if [[ -z ${domain:-} || -z `grep "$player" <<< "$Host_Domains" | grep ':' | grep '\.'` ]]
      then
        [[ -f domain.log ]] && rm domain.log
        grep "$player" <<< "$Host_Domains" > domain.log
        domain=$(echo "$(sed '/<script.*/,/<meta.*/ d' $1 | sed -n "/>${player/_/ }</,/src/ { /src/ P }" | eval $HOST_LOCATE)" |\
                 egrep 'http|www|com|io' | sed -n '1 p')
        [[ -z ${Host_Domains:-} ]] && Host_Domains=${player}:${domain} ||
        {
          [[ -n `grep "$player" <<< "$Host_Domains"` ]] &&
          Host_Domains=$(grep "$(< domain.log)" <<< "$Host_Domains" | sed "s,.*,${player}:${domain},") ||
          Host_Domains=$(echo "$Host_Domains"; echo "${player}:${domain}")
        }
      fi
      # If the video address links to a Flash file, then attempt to find an alternative version                                               |
      # NOTE: Websites are finally starting to replace the obsolete Flash technology!  Gnash was avoided due to excessive failure with Flash. |
      video_link=$(grep "$domain" $1 | sed -e 's_.*src="\(.*\)"_\1_' -e 's_".*__' -e 's_>__' -e 's_.*file=\(.*\)_\1_' -e 's_<.*>__g')
      case $domain in
        www.dailymotion.com)
          video_id=$(echo $video_link | sed 's_.*/swf/\(.*\)_\1_')		  
          if [[ -z `grep 'object_not_found' <(curl http://$domain/player/metadata/video/$video_id 2>/dev/null)` ]]
          then
            [[ ! -z ${Ethical_Hosts:-} ]] && Ethical_Hosts=$(echo "$Ethical_Hosts"; echo "$player") || Ethical_Hosts=$player
            Unethical_Hosts=$(echo "$Unethical_Hosts" | grep -v "$player")
            proprietary=$(($proprietary-1))
            echo -e "${N}${R}[${FUNCNAME[0]}] ${G}\"Unethical Host\" list ammended.  ${M}$player ${G}is not proprietary. ${N}"
            continue
          fi
        ;;
        www.veoh.com)
          video_id=$(echo $video_link | sed 's/\&#038;videoAutoPlay=0//' | sed 's_.*permalinkId=\(.*\)_\1_')
          if [[ -z `grep 'No date provided' <(curl http://$domain/m/watch.php?v=$video_id 2> /dev/null)` ]]
          then
            [[ ! -z ${Ethical_Hosts:-} ]] && Ethical_Hosts=$(echo "$Ethical_Hosts"; echo "$player") || Ethical_Hosts=$player
            Unethical_Hosts=$(echo "$Unethical_Hosts" | grep -v "$player")
            proprietary=$(($proprietary-1))
            echo -e "${N}${R}[${FUNCNAME[0]}] ${G}\"Unethical Host\" list ammended.  ${M}$player ${G}is not proprietary. ${N}"
            continue
          fi
        ;;
        *)
          # If their are any unethical/unreadable video links, then check to see if they can be parsed by `youtube-dl' before we deem them unreadable
          if [[ ! -z ${extract:-} ]] &&
             [[ ! -z `$extractors | grep "$(tr [[:upper:]] [[:lower:]] <<< "${player/_/ }")" | sed -n '1 p'` ||
                ! -z `$extractors | grep "$(eval $GET_DOMAIN <<< "$domain" | tr [[:upper:]] [[:lower:]])" | sed -n '1 p'` ]]; then

            # Determine if `youtube-dl' can parse this link without errors
            local warning=$($extract $video_link 2>&1)
            if [[ $? -ne 0 || -n `egrep 'WARNING|ERROR' <<< "${warning:-}"` ]]; then

              # Report to the user all unethical links                                                        |
              # NOTE: Don't be fooled by the first echo command-- NEVER install and use proprietary software! |
              player_number=$(echo "$Unethical_Hosts" | grep -n '.*' | grep $player | gawk -F: '{ print $1 }')
              [[ $player_number -eq 1 ]] &&
              echo -e "\n${N}${R}[${FUNCNAME[0]}] ${G}There ${M}$plural1 $((proprietary+1)) $plural2 ${G}install and use proprietary software. ${N}  \b"
              [[ ! -z ${WAIT:-} && $WAIT -le 1 ]] && : || sleep 2
              echo -e "${N}${R}[${FUNCNAME[0]}] ${G}Proprietary Host ${M}$player_number ${G}is:\t\t ${M}$player ${G}at ${C}$domain ${N}"
              [[ ! -z ${WAIT:-} && $WAIT -le 1 ]] && : || sleep 1
              continue # some videos may be copy-restricted (often by DRM software), blocked, or deleted
            fi
            # We will loop through these variables once we've collected all python parsable links
            [[ ! -z ${python_parsable_players:-} ]] && local python_parsable_players=$(echo "$python_parsable_players"; echo "$player") ||
                                                       local python_parsable_players=$player
            [[ ! -z ${python_parsable_domains:-} ]] && local python_parsable_domains=$(echo "$python_parsable_domains"; echo "$domain") ||
                                                       local python_parsable_domains=$domain
            echo -e "${N}${R}[${FUNCNAME[0]}] ${G}\"Unethical Host\" list ammended.  ${M}$player ${G}is not proprietary. ${N}"
          else
            # Report to the user all unethical links                                                        |
            # NOTE: Don't be fooled by the first echo command-- NEVER install and use proprietary software! |
            player_number=$(echo "$Unethical_Hosts" | grep -n '.*' | grep $player | gawk -F: '{ print $1 }')
            [[ $player_number -eq 1 ]] &&
            echo -e "\n${N}${R}[${FUNCNAME[0]}] ${G}There ${M}$plural1 $((proprietary+1)) $plural2 ${G}install and use proprietary software. ${N}  \b"
            [[ ! -z ${WAIT:-} && $WAIT -le 1 ]] && : || sleep 2
            echo -e "${N}${R}[${FUNCNAME[0]}] ${G}Proprietary Host ${M}$player_number ${G}is:\t\t ${M}$player ${G}at ${C}$domain ${N}"
            [[ ! -z ${WAIT:-} && $WAIT -le 1 ]] && : || sleep 1
            continue # some videos may be copy-restricted (often by DRM software), blocked, or deleted
          fi
        ;;
      esac
    done
  fi
  # Inform the user how many unethical/unreadable links can be parsed by `youtube-dl' via python
  [[ ! -z ${python_parsable_players:-} ]] && local yt_players=$(expr $(wc -l <<< "$python_parsable_players") '-' 1) python="${B}youtube-dl ${G}parsable"
  if [[ ! -z           ${yt_players:-} ]]; then
     [                  $yt_players = 0 ] && local plural1='player from a third-party site' ||
                                             local plural1='players from third-party sites'

    echo -e "\n${N}${R}[${FUNCNAME[0]}] ${B}$PROGNAME ${G}has detected ${M}$((yt_players+1)) $python ${M}$plural1${G} hosted in: ${C}$1 ${N}"
    [[ ! -z ${WAIT:-} && $WAIT -le 1 ]] && : || sleep 2
    for (( j=$GNU; j<=$yt_players; j++ )); do
      player=$(sed -n "$((j+1)) p" <<< "$python_parsable_players")
      domain=$(sed -n "$((j+1)) p" <<< "$python_parsable_domains")
      echo -e "${N}${R}[${FUNCNAME[0]}] ${G}Python Parsable Host ${M}$((j+1))${G}:\t\t ${M}$player ${G}at ${C}$domain ${N}"
      [[ ! -z ${WAIT:-} && $WAIT -le 1 ]] && : || sleep 1
    done
  fi
  # Inform the user how many ethical hosts are providing valid paths to their videos which can actually be read
  [[ ! -z ${Ethical_Hosts:-} ]] && local free=$(expr $(wc -l <<< "$Ethical_Hosts") '-' 1)
  if [[ ! -z       ${free:-} ]]; then
     [              $free = 0 ] && local plural1=is  plural2='host'  plural3=is ||
                                   local plural1=are plural2='hosts' plural3=are

    echo -e "\n${N}${R}[${FUNCNAME[0]}] ${G}There ${M}$plural1 $((free+1)) $plural2 ${G}that ${M}$plural3 ${G}providing a valid path to their video. ${N}  \b"
    [[ ! -z ${WAIT:-} && $WAIT -le 1 ]] && : || sleep 2
    for (( j=$GNU; j<=$free; j++ )); do
      player=$(sed -n "$((j+1)) p" <<< "$Ethical_Hosts")
      domain=$(grep "$player" <<< "$Host_Domains" | gawk -F: '{ print $2 }')

      #  Ensure that we can access the webpage for each ethical host.                                            |
      #+ If we're not able to, then the resource is not freely available to us and will be labelled as unethical |
      for host in $(echo "$Ethical_Hosts")
      do
        # If the domain for the current player is a bad address, then fix it and update the Host_Domains list
        domain=$(grep "$host" <<< "$Host_Domains" | gawk -F: '{ print $2 }')
        if [[ -z ${domain:-} || -z `grep "$player" <<< "$Host_Domains" | grep ':' | grep '\.'` ]]
        then
          [[ -f domain.log ]] && rm domain.log
          grep "$player" <<< "$Host_Domains" > domain.log
          domain=$(echo "$(sed '/<script.*/,/<meta.*/ d' $1 | sed -n "/>${player/_/ }</,/src/ { /src/ P }" | eval $HOST_LOCATE)" |\
                   egrep 'http|www|com|io' | sed -n '1 p')
          [[ -z ${Host_Domains:-} ]] && Host_Domains=${player}:${domain} ||
          {
            [[ -n `grep "$player" <<< "$Host_Domains"` ]] &&
            Host_Domains=$(grep "$(< domain.log)" <<< "$Host_Domains" | sed "s,.*,${player}:${domain},") ||
            Host_Domains=$(echo "$Host_Domains"; echo "${player}:${domain}")
          }
        fi
        # Whitelist `Dailymotion' and `Veoh' from our ethical link check
        if [[ $domain != 'www.dailymotion.com' || $domain != 'www.veoh.com' ]]; then
          supposedly_ethical_link=$(grep 'file' $1 | grep "$domain" | sed -e 's_.*src="\(.*\)"_\1_' -e 's_".*__' -e 's_>__' -e 's_<.*>__g' |\
                                    eval $BROWSER_READABLE | grep '^http' | sed -n '1 s/.*/&/p')

          # Attempt to download the page
          wget -q $supposedly_ethical_link -O "${domain//[0-9]*/static}.html" 2> /dev/null

          # If the page is not available: Edit the $Ethical_Hosts list, and append this host to the $Unethical_Hosts list
          if [[ $? = 8 ]]; then
            Ethical_Hosts=$(grep -v "$host" <<< "$Ethical_Hosts")
            Unethical_Hosts=$(echo "$Unethical_Hosts"; echo "$host")
          fi
        fi
      done
      # Ensure the domain exists; if not, move on to the next one
      [[ -z $domain ]] && continue

      # Report to the user all ethical links
      echo -e "${N}${R}[${FUNCNAME[0]}] ${G}Ethical Host ${M}$((j+1)) ${G}is:\t ${M}$player ${G}at ${C}$domain ${N}"
      [[ ! -z ${WAIT:-} && $WAIT -le 1 ]] && : || sleep 1
    done
    # If we've made it this far: Tell the user what our selection will be
    local selected_host=$(sed -n '1 p' <<< "$Ethical_Hosts")
    local selected_player=$(grep "$selected_host" <<< "$Video_Players" | gawk -F: '{ print $1 }' | sed -n '1 p')
    declare -g selected_domain=$(grep "$selected_host" <<< "$Host_Domains" | gawk -F: '{ print $2 }' | sed -n '1 p')
    echo -e "${N}${R}[${FUNCNAME[0]}] ${G}Selecting ${M}$selected_player${G}:\t ${M}$selected_host ${G}at ${C}$selected_domain ${N}"
    [[ ! -z ${WAIT:-} && $WAIT -le 1 ]] && : || sleep 2
  else
    if [[ ! -z ${python_parsable_players:-} ]]; then
      # If we've made it this far: Tell the user what our selection will be
      local python_host=$(sed -n '1 p' <<< "$python_parsable_players")
      local python_player=$(grep "$python_host" <<< "$Video_Players" | gawk -F: '{ print $1 }' | sed -n '1 p')
      declare -g python_domain=$(grep "$python_host" <<< "$Host_Domains" | gawk -F: '{ print $2 }' | sed -n '1 p')
      echo -e "${N}${R}[${FUNCNAME[0]}] ${G}No ethical hosts found!  ${B}$PROGNAME ${G}will use a python-parsable video link instead. ${N} \b"
      echo -e "${N}${R}[${FUNCNAME[0]}] ${G}Selecting ${M}$python_player${G}:\t ${M}$python_host ${G}at ${C}$python_domain ${N}"
    fi
  fi
  [[ -f domain.log ]] && rm domain.log
}

# End:
# gfunc83120-extract-information.sh ends here
