I'm trying to use IBM Watson's WebSocket interface so I can obtain the word timings of the audio. Here is a link to the documentation. The authentication part seems to be working fine. But I keep getting the error "Expected a JSON header message before receiving binary data." after sending my JSON message in the OnOpen method. I got the WebSocket-Sharp library from NuGet (called websocket-sharp-core).
How do I solve this problem?
using WebSocketSharp;
using Newtonsoft.Json.Linq;
using System.Net.Http;
using System.Net;
using System.Net.Http.Headers;
async void GenVoice(string text, string dest)
{
Console.WriteLine("obtaining access token...");
var dict = new Dictionary<string, string>();
dict.Add("grant_type", "urn:ibm:params:oauth:grant-type:apikey");
dict.Add("apikey", "<my_api_key>");
var authClient = new HttpClient();
authClient.DefaultRequestHeaders.Accept.Add(new MediaTypeWithQualityHeaderValue("application/json"));
var response = await authClient.PostAsync("https://iam.cloud.ibm.com/identity/token", new FormUrlEncodedContent(dict));
var auth = JObject.Parse(await response.Content.ReadAsStringAsync());
var accessToken = auth["access_token"].ToString();
Console.WriteLine("access token is " + accessToken);
var client = new WebSocket($"wss://stream.watsonplatform.net/text-to-speech/api/v1/synthesize?access_token={accessToken}&voice=en-US_MichaelV3Voice");
client.Connect();
client.OnOpen += OnOpen(client, text);
client.OnMessage += OnMessage;
client.OnError += (sender, e) =>
{
Console.WriteLine(e.Message);
};
client.OnClose += (sender, e) =>
{
Console.WriteLine($"closed; code={e.Code}; reason={e.Reason}; wasclean={e.WasClean}");
};
}
System.EventHandler OnOpen(object sender, string text)
{
var client = (WebSocket)sender;
var message = new JObject();
message.Add("content-type", JToken.FromObject("application/json"));
message.Add("accept", JToken.FromObject("*/*"));
message.Add("text", JToken.FromObject(text));
message.Add("timings", JToken.FromObject(new string[] { "words" }));
client.Send(Encoding.UTF8.GetBytes(message.ToString()));
Console.WriteLine("successfully opened socket");
return null;
}
void OnMessage(object sender, MessageEventArgs e)
{
if (e.IsText)
{
Console.WriteLine("got a message of type string, it says: " + e.Data);
}
if (e.IsBinary)
{
Console.WriteLine("got a message of type binary");
}
if (e.IsPing) { Console.WriteLine("got a message of type ping"); }
}
I've found the solution, I think the problem lies within the WebSocketSharp library itself. I switched over to the System.Net.WebSockets dll and it's working now. For anyone who needs some help, here's the final working code.
using System.Net.WebSockets;
using Newtonsoft.Json.Linq;
var client = new ClientWebSocket();
var canc = new System.Threading.CancellationToken();
await client.ConnectAsync(new Uri($"wss://stream.watsonplatform.net/text-to-speech/api/v1/synthesize?access_token={accessToken}&voice=en-US_MichaelV3Voice"), canc);
var message = new JObject();
message.Add("accept", JToken.FromObject("audio/wav"));
message.Add("text", JToken.FromObject(text));
message.Add("timings", JToken.FromObject(new string[] { "words" }));
await client.SendAsync(new ArraySegment<byte>(Encoding.UTF8.GetBytes(message.ToString())), WebSocketMessageType.Text, true, canc);
var toReturn = new List<Timing>();
while (client.State == WebSocketState.Open)
{
var buffer = new byte[4096 * 20];
var response = await client.ReceiveAsync(new ArraySegment<byte>(buffer), canc);
var data = new List<byte>();
if (response.MessageType == WebSocketMessageType.Text)
{
Console.WriteLine("got a string, it says: " + Encoding.UTF8.GetString(data.ToArray()));
}
}
The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.