#!/bin/sh
# hent-ordbank – Hent, pakk ut og gjer ordbankfilene klare til bruk.
#
# Copyright © 2008, 2009 Karl Ove Hufthammer <karl@huftis.org>.
#
#     This file is part of Ordbanken.
#
#     Ordbanken is free software: you can redistribute it and/or modify
#     it under the terms of the GNU General Public License as published by
#     the Free Software Foundation, either version 3 of the License, or
#     (at your option) any later version.
#
#     This program is distributed in the hope that it will be useful,
#     but WITHOUT ANY WARRANTY; without even the implied warranty of
#     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#     GNU General Public License for more details.
#
#     You should have received a copy of the GNU General Public License
#     along with this program.  If not, see <http://www.gnu.org/licenses/>.

cd ..

# Hent ned filene manuelt frå http://www.edd.uio.no/prosjekt/ordbanken/data/index.html
# wget http://www.edd.uio.no/prosjekt/ordbanken/data/ordbank_bm.zip
# wget http://www.edd.uio.no/prosjekt/ordbanken/data/ordbank_nn.zip
# wget http://www.edd.uio.no/prosjekt/ordbanken/dataformat.txt

# Pakk ut filene og overskriv dei gamle.
unzip -o -a -aa ordbank_bm.zip
unzip -o -a -aa ordbank_nn.zip

# Kod filene om til UTF-8.
for fil in *.txt
do
  recode latin1..utf8 $fil
done

# Sorter filene.
cd skript
  ./sorter
cd ..

# Fjern unødvendige filer.
rm -f *.zip
