File size: 522 Bytes
603b9d0 5440776 603b9d0 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 |
#! /bin/sh
for D in classical-tibetan-corpus old-tibetan-corpus modern-tibetan-corpus
do test -d $D || git clone --depth=1 https://github.com/tibetan-nlp/$D
done
( for F in *-tibetan-corpus/conllu/*.conllu
do case $F in
*-translated.conllu) : ;;
*) cat $F ;;
esac
done
) | awk '
{
if($0==""){
if(u!~/\tNOTAG\t/)
print u;
u="";
}
else
u=u$0"\n";
}'> all.conllu
python3 -m esupar.train KoichiYasuoka/bert-base-tibetan KoichiYasuoka/bert-base-tibetan-upos 32 /tmp all.conllu
exit 0
|