简体   繁体   中英

how to formulate english graphemes from a string in Matlab by reducing time complexity?

I've been working in the grapheme to phoneme conversion in Matlab and trying to produce a more generalized code to first break the word into the particular consonents,digraphs and their related vowels and segment each inputted string (word) into its grapheme form so that it can later produce the appropriate phonetic synthesis. however, due to the presence of a large number of rules and hence a huge loads of if-elseif-else, looping through each letter and presence of growing arrays, the time complexity has increased manifold and is not giving any result (it's showing busy, everytime I enter a string input through the console). So if you could please help me simplify the code, here's below:

prompt='Enter a string: ';
str=input(prompt,'s');
l=length(str);
grapheme=[];
i=1;

while (i<=l)

        if strcmpi(str(i),'b')  
            grapheme=[grapheme;{'b'}];
        elseif strcmpi(str(i:i+1),'bb')
            grapheme=[grapheme;{'b'}];
            i=i+1;
        elseif strcmpi(str(i),'d') 
            grapheme=[grapheme;{'d'}];
        elseif strcmpi(str(i:i+1),'dd')||strcmpi(str(i:i+1),'ed')
            grapheme=[grapheme;{'d'}];
            i=i+1;
        elseif strcmpi(str(i),'f')  
            grapheme=[grapheme;{'f'}];
        elseif strcmpi(str(i:i+1),'ff')||strcmpi(str(i:i+1),'ph')||strcmpi(str(i:i+1),'gh')
            grapheme=[grapheme;{'f'}];
            i=i+1;
        elseif strcmpi(str(i),'g')
            grapheme=[grapheme;{'g'}];          
        elseif  strcmpi(str(i:i+1),'gg')%||strcmpi(str(i:i+2),'gue')
           grapheme=[grapheme;{'g'}]; 
           i=i+1;
        elseif strcmpi(str(i),'h')
          grapheme=[grapheme;{'h'}];  
        elseif  strcmpi(str(i:i+1),'wh') && strcmpi(str(i+2),'o')
           grapheme=[grapheme;{'h'}]; 
           i=i+1;
        elseif strcmpi(str(i),'j')
            grapheme=[grapheme;{'j'}];  
        elseif strcmpi(str(i:i+1),'ge')||strcmpi(str(i:i+1),'jj')
            grapheme=[grapheme;{'j'}];
            i=i+1;
%         elseif strcmpi(str(i:i+2),'dge')
%            grapheme=[grapheme;{'j'}];
%            i=i+2;
        elseif strcmpi(str(i),'k')||strcmpi(str(i),'c')
            grapheme=[grapheme;{'k'}]; 
        elseif strcmpi(str(i),'ck')||strcmpi(str(i),'cc')||strcmpi(str(i),'qu')
            grapheme=[grapheme;{'k'}]; 
            i=i+1;
        elseif strcmpi(str(i),'l')
            grapheme=[grapheme,{'l'}];
        elseif strcmpi(str(i),'ll')||strcmpi(str(i),'le')
            grapheme=[grapheme,{'l'}];
           i=i+1;
        elseif strcmpi(str(i),'m')
            grapheme=[grapheme,{'m'}];
        elseif strcmpi(str(i),'mm')||strcmpi(str(i),'lm')||strcmpi(str(i),'mn')
            grapheme=[grapheme;{'m'}]; 
            i=i+1;
       elseif strcmpi(str(i),'n')
            grapheme=[grapheme,{'n'}];
       elseif strcmpi(str(i),'nn')||strcmpi(str(i),'kn')||strcmpi(str(i),'pn')||strcmpi(str(i),'gn')
            grapheme=[grapheme;{'n'}]; 
            i=i+1;
        elseif strcmpi(str(i),'p')
            grapheme=[grapheme;{'p'}];
        elseif strcmpi(str(i),'pp')
            grapheme=[grapheme;{'p'}];
            i=i+1;
        elseif strcmpi(str(i),'r')
            grapheme=[grapheme;{'r'}];
        elseif strcmpi(str(i),'rr')||strcmpi(str(i),'wr')||strcmpi(str(i),'rh')
            grapheme=[grapheme;{'r'}]; 
            i=i+1;
        elseif strcmpi (str(i),'s')
            grapheme=[grapheme;{'s'}];
         elseif strcmpi(str(i),'ss')||strcmpi(str(i),'sc')||strcmpi(str(i),'ce')||strcmpi(str(i),'se')||strcmpi(str(i),'ps')
            grapheme=[grapheme;{'s'}]; 
            i=i+1; 
         elseif strcmpi (str(i),'t')
            grapheme=[grapheme;{'t'}];
         elseif strcmpi(str(i),'tt')|| strcmpi(str(i),'te')
            grapheme=[grapheme;{'t'}]; 
            i=i+1; 
         elseif strcmpi (str(i),'v')
            grapheme=[grapheme;{'v'}];
         elseif strcmpi(str(i),'ve')
            grapheme=[grapheme;{'v'}]; 
            i=i+1;
         elseif strcmpi (str(i),'v')
            grapheme=[grapheme;{'v'}];
         elseif strcmpi(str(i),'ve')
            grapheme=[grapheme;{'v'}]; 
            i=i+1;
         elseif strcmpi (str(i),'w')
            grapheme=[grapheme;{'w'}];
         elseif strcmpi(str(i),'wh')
            grapheme=[grapheme;{'w'}]; 
            i=i+1;
         elseif strcmpi(str(i),'x')
            grapheme=[grapheme;{'x'}];
        elseif strcmpi (str(i),'y') && strcmp('I')
            grapheme=[grapheme;{'y'}];
        elseif strcmpi(str(i),'z')
            grapheme=[grapheme;{'z'}];
        elseif strcmpi(str(i),'zz')||strcmpi(str(i),'ze')
            grapheme=[grapheme;{'z'}]; 
            i=i+1;
          elseif strcmpi(str(i),'sh')
            grapheme=[grapheme;{'sh'}]; 
            i=i+1;
         elseif strcmpi(str(i),'zh')
            grapheme=[grapheme;{'z'}]; 
            i=i+1;
         elseif strcmpi(str(i),'ch')
            grapheme=[grapheme;{'ch'}]; 
            i=i+1;
         elseif strcmpi(str(i),'th')
            grapheme=[grapheme;{'th'}]; 
            i=i+1;
         elseif strcmpi(str(i),'a')
            grapheme=[grapheme;{'a'}]; 

         elseif strcmpi(str(i),'e')
            grapheme=[grapheme;{'e'}]; 
         elseif strcmpi(str(i),'i')
            grapheme=[grapheme;{'i'}]; 
         elseif strcmpi(str(i),'o')
            grapheme=[grapheme;{'o'}]; 
         elseif strcmpi(str(i),'u')
            grapheme=[grapheme;{'u'}];        
       end
       end
display(grapheme);
prompt = 'Enter a string : ';
`enter code here`str = input(prompt,'s');
l=length(str);
i=1;
f=0;  
grapheme=[]; %matrix to store the graphemes

    enter code here

if (l==1)
    if strcmp(str(i),'I')
            grapheme=[grapheme;{'ie'}];
    else
    grapheme=[grapheme;{str(l)}];
    end
else
while i<=l-1
   if double(str(i))==32
      grapheme=[grapheme;{'*'}];
   elseif strcmp(str(i),'I')
            grapheme=[grapheme;{'ie'}];

   elseif strcmpi(str(i:i+1),'sh')
       grapheme=[grapheme;{'sh'}];
       i=i+1;
   elseif strcmpi(str(i:i+1),'ee')
       grapheme=[grapheme;{'ee'}];
       i=i+1;
    elseif strcmpi(str(i:i+1),'ea')
       grapheme=[grapheme;{'ee'}];
       i=i+1;
    elseif strcmpi(str(i:i+1),'oa')
       grapheme=[grapheme;{'oa'}];
       i=i+1;
    elseif strcmpi(str(i:i+1),'ou')
       grapheme=[grapheme;{'ou'}];
       i=i+1;
    elseif strcmpi(str(i:i+1),'oo')
       grapheme=[grapheme;{'oo'}];
       i=i+1;
    elseif strcmpi(str(i:i+1),'er')
       grapheme=[grapheme;{'er'}];
       i=i+1;
    elseif strcmpi(str(i:i+1),'bb')
       grapheme=[grapheme;{'b'}];
       i=i+1;

     elseif strcmpi(str(i:i+1),'ll')||strcmpi(str(i:i+1),'le')
       grapheme=[grapheme;{'l'}];
       i=i+1;
     elseif strcmpi(str(i:i+1),'ff')
       grapheme=[grapheme;{'f'}];
       i=i+1;
     elseif strcmpi(str(i:i+1),'dd')||strcmpi(str(i:i+1),'ed')
            grapheme=[grapheme;{'d'}];
            i=i+1;
     elseif strcmpi(str(i:i+1),'ff')||strcmpi(str(i:i+1),'ph')||strcmpi(str(i:i+1),'gh')
            grapheme=[grapheme;{'f'}];
     elseif  strcmpi(str(i:i+1),'gg')%||strcmpi(str(i:i+2),'gue')
           grapheme=[grapheme;{'g'}]; 
           i=i+1; 
     elseif strcmpi(str(i:i+1),'ge')||strcmpi(str(i:i+1),'jj')
            grapheme=[grapheme;{'j'}];
            i=i+1;
    elseif strcmpi(str(i:i+1),'ck')||strcmpi(str(i:i+1),'cc')||strcmpi(str(i:i+1),'qu')
            grapheme=[grapheme;{'c'}];
            i=i+1;
     elseif strcmpi(str(i:i+1),'mm')||strcmpi(str(i:i+1),'lm')||strcmpi(str(i:i+1),'mn')
            grapheme=[grapheme;{'m'}]; 
            i=i+1;
     elseif strcmpi(str(i:i+1),'nn')||strcmpi(str(i:i+1),'kn')||strcmpi(str(i:i+1),'pn')||strcmpi(str(i:i+1),'gn')
            grapheme=[grapheme;{'n'}]; 
            i=i+1;
     elseif strcmpi(str(i:i+1),'pp')
            grapheme=[grapheme;{'p'}];
            i=i+1;
     elseif strcmpi(str(i:i+1),'rr')||strcmpi(str(i:i+1),'wr')||strcmpi(str(i:i+1),'rh')
            grapheme=[grapheme;{'r'}]; 
            i=i+1
      elseif strcmpi(str(i:i+1),'ss')||strcmpi(str(i:i+1),'sc')||strcmpi(str(i:i+1),'ce')||strcmpi(str(i:i+1),'se')||strcmpi(str(i:i+1),'ps')
            grapheme=[grapheme;{'s'}]; 
            i=i+1; 
      elseif strcmpi(str(i:i+1),'tt')|| strcmpi(str(i),'te')
            grapheme=[grapheme;{'t'}]; 
            i=i+1; 
      elseif strcmpi(str(i:i+1),'ve')
            grapheme=[grapheme;{'v'}]; 
            i=i+1;
      elseif strcmpi(str(i:i+1),'wh')
            grapheme=[grapheme;{'w'}]; 
            i=i+1;
       elseif strcmpi(str(i:i+1),'zz')
            grapheme=[grapheme;{'z'}]; 
            i=i+1;
       elseif strcmpi(str(i:i+1),'ch')
            grapheme=[grapheme;{'ch'}]; 
            i=i+1;
       elseif strcmpi(str(i:i+1),'th')
            grapheme=[grapheme;{'th'}]; 
            i=i+1;
        elseif strcmpi(str(i:i+1),'oy')
            grapheme=[grapheme;{'oy'}]; 
            i=i+1;
        elseif strcmpi(str(i:i+1),'ai')
            grapheme=[grapheme;{'ai'}]; 
            i=i+1;        
        elseif strcmpi(str(i:i+1),'ou')
            grapheme=[grapheme;{'ou'}]; 
            i=i+1;
        elseif strcmpi(str(i:i+1),'ew')
            grapheme=[grapheme;{'ew'}]; 
            i=i+1;
        elseif strcmpi(str(i:i+1),'ie')
            grapheme=[grapheme;{'ie'}]; 
            i=i+1;
   else

     grapheme=[grapheme;{str(i)}];
     %end
   end
   i=i+1;
   if i==l
       f=1;
   end
end
end
if f==1
   grapheme=[grapheme;{str(end)}]; 
end
display(grapheme);

The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM