#!/bin/bash

# This script was used in the course of the work described by Read et al. (2012)
# substitute Unicode quotes (‘ ’ “ ”) with straight ASCII quotes (' "; in the
# ascii subdirectory) or the directional variants used in LaTeX and the PTB
# (` ' `` ' '; in the latex subdirectory)

for CORPUS in $@
do
    mkdir -p $CORPUS/ascii $CORPUS/latex     
    for TEXT in segmented unsegmented
    do
        sed -e "s/‘/'/g" -e "s/’/'/g" -e 's/“/"/g' -e 's/”/"/g'  \
            $CORPUS/$TEXT.txt > $CORPUS/ascii/$TEXT.txt
        sed -e 's/‘/`/g' -e "s/’/'/g" -e 's/“/``/g' -e "s/”/''/g" \
            $CORPUS/$TEXT.txt > $CORPUS/latex/$TEXT.txt
    done
done