#!/usr/bin/ruby

require 'getoptlong'
require 'statistics'
require 'perceptron'
require 'token'

opts = GetoptLong.new(
  ["--help",           "-h" , GetoptLong::NO_ARGUMENT ],
  ["--learn",          "-l" , GetoptLong::NO_ARGUMENT ],
  ["--score",          "-s",  GetoptLong::NO_ARGUMENT ],
  ["--porn",           "-p",  GetoptLong::OPTIONAL_ARGUMENT ],
  ["--no-porn",        "-n",  GetoptLong::OPTIONAL_ARGUMENT ],
  ["--network",        "-k",  GetoptLong::NO_ARGUMENT ],
  ["--bayesian",       "-y",  GetoptLong::NO_ARGUMENT ],
  ["--net-params",     "-a",  GetoptLong::REQUIRED_ARGUMENT ],
  ["--file",           "-f",  GetoptLong::REQUIRED_ARGUMENT ],
  ["--directory",      "-d",  GetoptLong::REQUIRED_ARGUMENT ],
  ["--stdin",          "-i",  GetoptLong::NO_ARGUMENT ],
  ["--data-base",      "-b",  GetoptLong::REQUIRED_ARGUMENT ],
  ["--verbose",        "-v",  GetoptLong::NO_ARGUMENT ],
  ["--super-verbose",  "-e",  GetoptLong::NO_ARGUMENT ]
)

opts.each do |opt, arg|
case opt
    when "--help"
       @HELP = "true"
    when "--learn"
       @LEARN = "true"
    when "--score"
       @LEARN = "false"
    when "--porn"
       @PORN = "true"
       @porn = arg
    when "--no-porn"
       @PORN = "false"
       @noporn = arg
    when "--network"
       @BAYE = "false"
    when "--bayesian"
       @BAYE = "true"
    when "--net-params"
       @netparams = arg
    when "--file"
       @INPUT = 1
       @file = arg
    when "--directory"
       @INPUT = 2
       @dir = arg
    when "--stdin"
       @INPUT = 3
       # @file = arg
    when "--data-base"
       @db_name = arg
    when "--verbose"
       @DISPLAY = 1
    when "--super-verbose"
       @DISPLAY = 2
  end
end

if @HELP == "true"
  puts "###### This is the help message ######"
  puts "-h              will display this message"
  puts "-l              to choose the learning option"
  puts "-s              to choose the scoring option"
  puts "-p[directory?]  to indicate this is porn"
  puts "                add an argument when you are learning the network parameters"
  puts "-n[directory?]  to indicate this is not porn"
  puts "                add an argument when you are learning the network parameters"
  puts "-k              to indicate you want neurol network method"
  puts "-y              to indicate you want bayesian calculations method"
  puts "-a              to indicate where to store/read the neural network parameters"
  puts "-f[file]        to indicate you want to parse a file"
  puts "-d[directory]   to indicate you want to parse a directory"
  puts "-i              to indicate you want to parse the standard input"
  puts "-b[dbname]      to indicate the location of the database"
  puts "-v              to print the name of the file on standard output"
  puts "-e              to print additional information whn scoring"
  puts

  puts "Examples"
  puts "$./filter.rb -l -p -d ../dbpornbis/porn/ -b database.db -v"
  puts "will store tokens in database.db from files contained in ../dbpornbis/porn/ which are porn and will display the name of the files parsed, will creta database.db if it doesn't exist"
  puts "$wget -O - http://www.website.com/| ./filter.rb -s -y -i -b database.db"
  puts "will score the webpage given on standard input using bayesian calculations and database.db"
  puts "$./filter.rb -l -n ../dbporn/noporn/learning/ -p ../dbporn/porn/learning/ -b database.db -a mynetwork -v"
  puts "will learn the neural network params using ../dbporn/noporn/learning/ as non porn directory, ../dbporn/porn/learning/ as porn directory and database.db. Params will be stored in network"
  puts "$./filter.rb -s -k -f dir/index.html -b -a network database.db -e"
  puts "will score index.html using neural network with params from network and with database.db. Will also display the tokens found as well as there probabilities"
end

######## FOR LEARNING ########
if @LEARN == "true"

  # FOR BAYESIAN DATABASE #
  if @BAYE =="true"
    begin
      db = Hashdb.new(@db_name, 'r+')
    rescue # le fichier n'existe pas
      db = Hashdb.new(@db_name, 'w')
    end  
    case @INPUT
    when 1
      if @DISPLAY == 1
        puts @file
      end
      str = File.open(@file,'r').gets(nil)
      recup = Token.new(str)
      all = recup.get_all()
      db.updateHash(all, @PORN)   
    when 2
      Dir.foreach(@dir) {|file| 
        begin
          if @DISPLAY == 1
            puts file
          end
          str = File.open(@dir+file,'r').gets(nil)
          recup = Token.new(str)
          all = recup.get_all()
          db.updateHash(all, @PORN)
        rescue
          if @DISPLAY == 1
            puts $!
          end
        end
      }
    when 3
      str = $stdin.gets(nil)
      recup = Token.new(str)
      all = recup.get_all()
      db.updateHash(all, @PORN)
    end
    
    # FOR NEURONAL PARAMETERS #
    else
    @sortiePorn   = File.open('neuronesPorn.txt','w')
    @sortieNoPorn = File.open('neuronesNoPorn.txt','w')
    @net = Network.new(@db_name, @netparams)
    Dir.foreach(@porn) {|file|
      begin
        if @DISPLAY == 1
          puts file
        end
        str = File.open(@porn+file,'r').gets(nil)
        vec = @net.getVector(str)
        @sortiePorn.puts vec
      rescue
        if @DISPLAY == 1
          puts $!
        end
      end
    }
    Dir.foreach(@noporn) {|file|
      begin
        if @DISPLAY == 1
          puts file
        end
        str = File.open(@noporn+file,'r').gets(nil)
        vec = @net.getVector(str)
        @sortieNoPorn.puts vec
      rescue
        if @DISPLAY == 1
          puts $!
        end
      end
    }
        
    @net.learnFromVectorFile(@sortiePorn,@sortieNoPorn)
  end


######## FOR SCORING ########
else  
  case @INPUT
    
    # FOR A FILE #
  when 1
    if @DISPLAY == 1
      puts @file
    end
    str = File.open(@file,'r').gets(nil)
    
    # WITH BAYESIAN CALCULATIONS #
    if @BAYE=="true"
      stats = Stats.new(str,@db_name)
      proba = stats.getProbabilityFile("false")
      
      # WITH NEURAL NETWORK #
    else
      stats = Network.new(@db_name,@netparams)
      proba = stats.workNetwork(str)  
    end
    
    if @DISPLAY == 2
      info = stats.getInfo()
      puts info
    end 
    if @DISPLAY == 1
      info = stats.getHisto();
      puts info
    end
      puts proba
    
      
    # FOR A DIRECTORY #
  when 2
    Dir.foreach(@dir) {|file| 
      begin
        if @DISPLAY == 1
          puts file
        end
        str = File.open(@dir+file,'r').gets(nil)
        
        # WITH BAYESIAN CALCULATIONS #
        if @BAYE=="true"
          stats = Stats.new(str,@db_name)
          proba = stats.getProbabilityFile("false")
          
          # WITH NEURAL NETWORK #
        else
          stats = Network.new(@db_name,@netparams)
          proba = stats.workNetwork(str)  
        end
        puts proba
      rescue
        if @DISPLAY == 1
          puts $!
        end
      end
    }
    
    # FOR STANDARD INPUT #
  when 3
    str = $stdin.gets(nil)

    # WITH BAYESIAN CALCULATIONS #
    if @BAYE=="true"
      stats = Stats.new(str,@db_name)
      proba = stats.getProbabilityFile("false")
      
      # WITH NEURAL NETWORK #
    else
      stats = Network.new(@db_name,@netparams)
      proba = stats.workNetwork(str)  
    end

    if @DIsPLAY == 2
      info = stats.getInfo()
      puts info
    end 
    if @DISPLAY == 1
      info = stats.getHisto();
      puts info
    end
    puts proba
    
  end
end 


 
