The .prx format

Files of type .prx (prosodic annotation) comprise a chronological representation of the prosodic annotation in an XML text format. The structure of this format is described in prtext.dtd on the annotation DVD. The .prx files can be found in /data/annot/xml/prx1 and /data/annot/xml/prx2 on the annotation DVD.

<?xml version="1.0"?>
<!DOCTYPE prtext SYSTEM "prtext.dtd">
<prtext ref="fn123456">
<prau ref="fn123456.1" s="N02008">
  <prw ref="fn123456.1.1"       w="je"         annot="je"
     nprom="0"          nlength="0"     nweakb="0"      nstrongb="0"
     tbeg="157.056"     tend="160.988"  leftb="auto"    rightb="none"/>
  <prw ref="fn123456.1.2"       w="kunt"       annot="kunt"
     nprom="0"          nlength="0"     nweakb="0"      nstrongb="0"
     tbeg="157.056"     tend="160.988"  leftb="none"    rightb="none"/>
  <prw ref="fn123456.1.3"       w="ook"        annot="ook"
     nprom="0"          nlength="0"     nweakb="0"      nstrongb="0"
     tbeg="157.056"     tend="160.988"  leftb="none"    rightb="none"/>
  <prw ref="fn123456.1.4"       w="ha"         annot="ha"
     nprom="0"          nlength="0"     nweakb="0"      nstrongb="0"
     tbeg="157.056"     tend="160.988"  leftb="none"    rightb="none"/>
  <prw ref="fn123456.1.5"       w="tegen"      annot="tegen"
     nprom="0"          nlength="0"     nweakb="0"      nstrongb="0"
     tbeg="157.056"     tend="160.988"  leftb="none"    rightb="none"/>
  <prw ref="fn123456.1.6"       w="haar"       annot="haar"
     nprom="0"          nlength="0"     nweakb="0"      nstrongb="0"
     tbeg="157.056"     tend="160.988"  leftb="none"    rightb="none"/>
  <prw ref="fn123456.1.7"       w="zeggen"     annot="z^e^ggen"
     nprom="1"          nlength="0"     nweakb="0"      nstrongb="0"
     tbeg="157.056"     tend="160.988"  leftb="none"    rightb="none"/>
  <prw ref="fn123456.1.8"       w="dat"        annot="dat"
     nprom="0"          nlength="0"     nweakb="0"      nstrongb="0"
     tbeg="157.056"     tend="160.988"  leftb="none"    rightb="none"/>
  <prw ref="fn123456.1.9"       w="ik"         annot="ik"
     nprom="0"          nlength="0"     nweakb="0"      nstrongb="0"
     tbeg="157.056"     tend="160.988"  leftb="none"    rightb="none"/>
  <prw ref="fn123456.1.10"      w="best"       annot="best"
     nprom="0"          nlength="0"     nweakb="0"      nstrongb="0"
     tbeg="157.056"     tend="160.988"  leftb="none"    rightb="none"/>
  <prw ref="fn123456.1.11"      w="bereid"     annot="bereid"
     nprom="0"          nlength="0"     nweakb="0"      nstrongb="0"
     tbeg="157.056"     tend="160.988"  leftb="none"    rightb="none"/>
  <prw ref="fn123456.1.12"      w="ben"        annot="ben"
     nprom="0"          nlength="0"     nweakb="0"      nstrongb="0"
     tbeg="157.056"     tend="160.988"  leftb="none"    rightb="none"/>
  <prw ref="fn123456.1.13"      w="een"        annot="een"
     nprom="0"          nlength="0"     nweakb="0"      nstrongb="0"
     tbeg="157.056"     tend="160.988"  leftb="none"    rightb="none"/>
  <prw ref="fn123456.1.14"      w="uh"         annot="uh"
     nprom="0"          nlength="0"     nweakb="0"      nstrongb="0"
     tbeg="157.056"     tend="160.988"  leftb="none"    rightb="weak"/>
  <prw ref="fn123456.1.15"      w="glansrol"   annot="gl^a^ns|r^o^l"
     nprom="2"          nlength="0"     nweakb="1"      nstrongb="0"
     tbeg="157.056"     tend="160.988"  leftb="weak"    rightb="auto"/>
  <prl ref="fn123456.1.16"      w="..."/>
</prau>
<prau ref="fn123456.2" s="N02008">
  <prw ref="fn123456.2.1"       w="vind"        annot="v^i^nd"
     nprom="1"          nlength="0"     nweakb="0"      nstrongb="0"
     tbeg="454.704"     tend="457.756"  leftb="strong"  rightb="none"/>
  <prw ref="fn123456.2.2"       w="je"          annot="je"
     nprom="0"          nlength="0"     nweakb="0"      nstrongb="0"
     tbeg="454.704"     tend="457.756"  leftb="none"    rightb="none"/>
  <prw ref="fn123456.2.3"       w="nou"         annot="nou"
     nprom="0"          nlength="0"     nweakb="0"      nstrongb="0"
     tbeg="454.704"     tend="457.756"  leftb="none"    rightb="none"/>
  <prw ref="fn123456.2.4"       w="dat"         annot="dat"
     nprom="0"          nlength="0"     nweakb="0"      nstrongb="0"
     tbeg="454.704"     tend="457.756"  leftb="none"    rightb="none"/>
  <prw ref="fn123456.2.5"       w="je"          annot="je"
     nprom="0"          nlength="0"     nweakb="0"      nstrongb="0"
     tbeg="454.704"     tend="457.756"  leftb="none"    rightb="none"/>
  <prw ref="fn123456.2.6"       w="kan"        annot="kan"
     nprom="0"          nlength="0"     nweakb="0"      nstrongb="0"
     tbeg="454.704"     tend="457.756"  leftb="none"    rightb="none"/>
  <prw ref="fn123456.2.7"       w="zien"       annot="z^ie^%n%"
     nprom="1"          nlength="1"     nweakb="0"      nstrongb="0"
     tbeg="454.704"     tend="457.756"  leftb="none"    rightb="weak"/>
  <prw ref="fn123456.2.8"       w="dat"        annot="dat"
     nprom="0"          nlength="0"     nweakb="0"      nstrongb="0"
     tbeg="454.704"     tend="457.756"  leftb="weak"    rightb="none"/>
  <prw ref="fn123456.2.9"       w="zij"        annot="zij"
     nprom="0"          nlength="0"     nweakb="0"      nstrongb="0"
     tbeg="454.704"     tend="457.756"  leftb="none"    rightb="none"/>
  <prw ref="fn123456.2.10"      w="toneel"     annot="ton^ee^l"
     nprom="1"          nlength="0"     nweakb="0"      nstrongb="0"
     tbeg="454.704"     tend="457.756"  leftb="none"    rightb="auto"/>
  <prw ref="fn123456.2.11"      w="doet"       annot="doet"
     nprom="0"          nlength="0"     nweakb="0"      nstrongb="0"
     tbeg="458.499"     tend="458.724"  leftb="auto"    rightb="auto"/>
  <prw ref="fn123456.2.12"      w="in"         annot="in"
     nprom="0"          nlength="0"     nweakb="0"      nstrongb="0"
     tbeg="459.499"     tend="463.431"  leftb="auto"    rightb="none"/>
  <prw ref="fn123456.2.13"      w="haar"       annot="haar"
     nprom="0"          nlength="0"     nweakb="0"      nstrongb="0"
     tbeg="459.499"     tend="463.431"  leftb="none"    rightb="none"/>
  <prw ref="fn123456.2.14"      w="in"         annot="in"
     nprom="0"          nlength="0"     nweakb="0"      nstrongb="0"
     tbeg="459.499"     tend="463.431"  leftb="none"    rightb="none"/>
  <prw ref="fn123456.2.15"      w="haar"       annot="haar"
     nprom="0"          nlength="0"     nweakb="0"      nstrongb="0"
     tbeg="459.499"     tend="463.431"  leftb="none"    rightb="none"/>
  <prw ref="fn123456.2.16"      w="privé-s-uh-appearance"
     annot="privé-s-||uh-app^ea^rance"
     nprom="1"          nlength="0"     nweakb="0"      nstrongb="1"
     tbeg="459.499"     tend="463.431"  leftb="none"    rightb="weak"/>
  <prw ref="fn123456.2.17"      w="zeg"        annot="zeg"
     nprom="0"          nlength="0"     nweakb="0"      nstrongb="0"
     tbeg="459.499"     tend="463.431"  leftb="weak"    rightb="none"/>
  <prw ref="fn123456.2.18"      w="maar"       annot="maar"
     nprom="0"          nlength="0"     nweakb="0"      nstrongb="0"
     tbeg="459.499"     tend="463.431"  leftb="none"    rightb="auto"/>
  <prl ref="fn123456.2.19"      w="?"/>
</prau>
</prtext>

<prtext> text with a prosodic annotation.
<prau> an annotation unit. The boundaries of this element are determined by the punctuation mark
<prw> a word within an annotation unit (<prau>).
<prl> the punctuation mark within an annotation unit (<prau>). There are three possible values for this element: ".", "..." of "?".
<prmu> a mark-up unit which may contain COMMENT or BACKGROUND information.
<prm> a marker within the mark-up unit (<prmu>).
ref The identification code is composed of one, two or three parts (depending on the element with which it is associated) which are separated by a full stop. The meaning is as follows:
<sample number, rank number>.<annotation unit, rank number>.<word/marker/punctuation mark, rank number>
s speaker identification. In the context of the <prau> elementpossible values for this attribute are: Nxxxxx, Vxxxxx or UNKOWN where x denotes a digit. 
w word form as it occurs in the orthographic transcription (vlg. data in de .ort-bestanden)
annot prosodic annotation which has been assigned to the word form.
nprom number of prominent syllables in the word form.
nlength number of lengthened sounds in the word form.
nweakb number of weak boundaries in the word form.
nstrongb number of strong boundaries in the word form.
tbeg time marker of the beginning of <prau>.
tend time marker of the end of <prau>.
leftb/rightb nature of the left/right boundary. The following values are possible: 
auto : prosodic boundary has been placed by the machine.
none : no prosodic boundary.
weak : the prosodic boundary is marked as weak ("|").
strong : the prosodic boundary is marked as strong ("||").

All characters used from the ISO-8859.1 character set that fall outside the 7-bit range have been translated according to the Character entity references for ISO 8859-1 characters. The subset of special characters that were used can be found in ttext.dtd on the annotation DVD. In entities.htm an overview is presented of the various standards for this character (sub)set.