<?xml version="1.0" encoding="UTF-8"?>
<xsl:stylesheet version="2.0"
                xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
<!--This program is free software: you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation, either version 3 of the License, or
    (at your option) any later version.

    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.

    You should have received a copy of the GNU General Public License
    along with this program.  If not, see <http://www.gnu.org/licenses/>. -->
<!--Author of this style sheet: Ralf Herzog
    Date of creation: August 2010 
    License of this style-sheet: GPLv3 
    Usage: saxonb-xslt -ext:on -s:'/home/ubuntu/Documents/201008/german-0.2.1/german-0.2.1.xml' \
                               -xsl:'/home/ubuntu/Documents/201008/german-0.2.1/compare-s-0.2.1-s-dir.xsl' \
                               -o:'/home/ubuntu/Documents/201006/audacity/xaa-folder/dummy.xml'
    More info: http://spirit.blau.in/simon/2010/07/17/german-500-words-speech-models/ -->
    <xsl:output method="xml"/>
    
<xsl:template match="lexicon">

<!-- variables 201008 -->ee 
 <xsl:variable name="traindir">xaae</xsl:variable>
 <xsl:variable name="unused-graphemes">Au Abb Betrieben Arbeitslose Betätigen diejenigen Ausbildungsmöglichkeiten Ausdehnungsmöglichkeiten Ausdrucksmöglichkeiten</xsl:variable>
 <xsl:variable name="unused-words">ʔaʀbaɪ̯t͡smaʀktaphɛŋɪgɐ aʀbaɪ̯t͡sloːzəngɛltt͡svaɪ̯ɛmpfɛŋɐ aʀbaɪ̯t͡sloːzn̩ ʔaʀbaɪ̯t͡skaːləndɐ ʔaʀbaɪ̯t͡spapiːʀ ʔʀabat͡stafəl  ʔaʀbaɪ̯t͡skʀaftɪntɛnziːvəs ʔaʀbaɪ̯t͡sʀɛçtlɪçə aʀbaɪ̯t͡sʀɪçtɐ ʔabʔɛʀat͡sɪoːnst͡saɪ̯t aːɐ̯baɪ̯t͡sʀɛçt aːɐ̯baɪ̯t͡sfɛɪçkaɪ̯t ʔabɛnsbəʀk aɪ̯atɔlaː</xsl:variable><!--xaa-->
 <xsl:variable name="unused-words"><xsl:sequence select="$unused-words"/><xsl:text> </xsl:text>ʔabɛːzən ʔabəntɔɪ̯ɐfɪlm ʔaːaː ʔabaː abaʊ̯ ʔabbɪldʊŋ ʔʀaɪ̯bʊŋsfʀaɪ̯ ʔaʃaʊ̯ ʔaʀbaɪ̯t͡smaʀktaphɛŋɪgɐ ʔabatmən ʃʀaɪ̯bfœʀdəʀndɐ ʔaʀbaɪ̯t͡svyːtɪgɐ ʔaʀbaɪ̯tzaːməm ʔabɛndəʀə ʔadəltən  aʀbaɪ̯tɐpaʀtaɪ̯ ʔadaptɐlaɪ̯stən</xsl:variable>
 <xsl:variable name="unused-words"><xsl:sequence select="$unused-words"/><xsl:text> </xsl:text> ʃʀaʊ̯bənfœʀmɪgə ʀabiː ʔaɪ̯blmaɪ̯ɐ aʃafɛnbʊʀgɐ aɪ̯bən abaːs aʀbaɪ̯tn̩ ʔabatɪs ʃʀaʊ̯bənfœʀmɪgən ʔaːbəntʀɔt ʔaʀbaɪ̯t͡sbant abəntɔɪ̯ɐfɪlm ʔaʀbaɪ̯t͡sçɔɪ̯ ʃabloːniːɐ̯tɐ ʔabʔɛʀatɪoː ʔaʀbaɪ̯t͡sbeːzuːks aɪ̯çɪŋɐ abvəndən</xsl:variable>
 <xsl:variable name="unused-words"><xsl:sequence select="$unused-words"/><xsl:text> </xsl:text> aʊ̯ʀɪç mɪttʀaːgəndɐ mɪttʀaːgənt</xsl:variable><!--xab
 <xsl:variable name="unused-words"><xsl:sequence select="$unused-words"/><xsl:text> </xsl:text></xsl:variable>
 <xsl:variable name="unused-words"><xsl:sequence select="$unused-words"/><xsl:text> </xsl:text></xsl:variable>-->
 <xsl:variable name="unused-words" select="normalize-space($unused-words)"></xsl:variable> <!---->

<!-- result lexicon-___.xml -->
 <xsl:result-document href="/home/ubuntu/Documents/201008/german-0.2.1/lexicon-speech-model-0.1.6.xml"
                      method="xml"><!-- output-method=xml --><xsl:text>
</xsl:text>
<xsl:comment>    This program is free software; you can redistribute it and/or modify
        it under the terms of the GNU General Public License as published by
        the Free Software Foundation; either version 3 of the License, or
        (at your option) any later version.
        
        This program is distributed in the hope that it will be useful,
        but WITHOUT ANY WARRANTY; without even the implied warranty of
        MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
        GNU General Public License for more details.
        
        You should have received a copy of the GNU General Public License
        along with this program.  If not, see &lt;http://www.gnu.org/licenses/&gt;.  </xsl:comment><xsl:text>
</xsl:text><xsl:comment>    License of this XML file is GPLv3. 
        Read the full license text: http://script.blau.in/etc/GPL_License 
        This XML document covers section <xsl:sequence select="$traindir"></xsl:sequence></xsl:comment><lexicon>
 <xsl:for-each select="lexeme">
  <xsl:variable name="dictionary-grapheme" select="grapheme"></xsl:variable>
  <xsl:variable name="dictionary-phoneme" select="phoneme[1]"></xsl:variable>
  <xsl:variable name="dictionary-role" select="@role"></xsl:variable>

  <xsl:variable name="is-phoneme-ok">
   <xsl:value-of>
    <xsl:for-each select="tokenize($unused-words, '\s+')">
     <xsl:if test="$dictionary-phoneme=.">notok</xsl:if>
    </xsl:for-each>
   </xsl:value-of>
  </xsl:variable>  
  
  <xsl:variable name="is-grapheme-ok">
   <xsl:value-of>
    <xsl:for-each select="tokenize($unused-graphemes, '\s+')">
     <xsl:if test="$dictionary-grapheme=.">notok</xsl:if>
    </xsl:for-each>
   </xsl:value-of>
  </xsl:variable>  <!---->
  
   <xsl:for-each select="tokenize(unparsed-text('/home/ubuntu/Documents/201008/german-0.2.1/prompts-wav-dir'), '\n')"> <!-- http://www.w3.org/TR/xslt20/#document -->
    <xsl:if test=".=$dictionary-phoneme
                 and not($is-phoneme-ok eq 'notok')
                 and not($is-grapheme-ok eq 'notok')"> <!--
                 and not(contains($xaa-unused-words, .)) http://www.w3.org/TR/xpath20/#id-conditionals -->
     <lexeme><xsl:text>
</xsl:text>
      <xsl:sequence select="$dictionary-grapheme"/><xsl:text>
 </xsl:text>
      <xsl:sequence select="$dictionary-phoneme"/>
     </lexeme><xsl:text>
</xsl:text>
     
    </xsl:if>

   </xsl:for-each>              
 </xsl:for-each></lexicon>
 </xsl:result-document>                 

<!-- result prompts-___ 201008 -->
 <xsl:result-document href="/home/ubuntu/Documents/201008/german-0.2.1/prompts-reduced"
                      method="text">
 <xsl:for-each select="lexeme">
  <xsl:variable name="dictionary-grapheme" select="grapheme"></xsl:variable>
  <xsl:variable name="dictionary-phoneme" select="phoneme[1]"></xsl:variable>
   
  <xsl:variable name="is-phoneme-ok">
   <xsl:value-of>
    <xsl:for-each select="tokenize($unused-words, '\s+')">
     <xsl:if test="$dictionary-phoneme=.">notok</xsl:if>
    </xsl:for-each>
   </xsl:value-of>
  </xsl:variable>
  
    <xsl:variable name="is-grapheme-ok">
   <xsl:value-of>
    <xsl:for-each select="tokenize($unused-graphemes, '\s+')">
     <xsl:if test="$dictionary-grapheme=.">notok</xsl:if>
    </xsl:for-each>
   </xsl:value-of>
  </xsl:variable> 
  
  <!--
  xsl:if test="for $i in item-at( return $i)-->
   <xsl:for-each select="tokenize(unparsed-text('/home/ubuntu/Documents/201008/german-0.2.1/prompts-wav-dir'), '\n')"> <!-- http://www.w3.org/TR/xslt20/#document -->
    <xsl:if test=".=$dictionary-phoneme
                 and not($is-phoneme-ok eq 'notok')                 
                 and not($is-grapheme-ok eq 'notok')"> <!-- http://www.w3.org/TR/xpath20/#id-conditionals -->
      <xsl:sequence select="$dictionary-phoneme"/><xsl:text> </xsl:text>
      <xsl:value-of select="upper-case($dictionary-grapheme)"/>
      
     <xsl:text>
</xsl:text>
     
    </xsl:if>

   </xsl:for-each>              
 </xsl:for-each>
 </xsl:result-document >

</xsl:template>
</xsl:stylesheet>                 
                 

<!-- -->
<!--	This style-sheet is licensed under the GPLv3.
	    Read the full license text: http://script.blau.in/etc/GPL_License -->

