简体   繁体   中英

Azure Topics worker role stops processing message after 60 seconds

We have a cloud service using a worker role to process messages it receives from a Topic set up on Azure Service Bus.

The message itself seems to arrive intact and is usually received and processed correctly. In some instances however, the message seems to stop processing (Logging abruptly ends and no more references to the message being processed are seen in our WadLogsTable). From my research, this might be happening due to the worker role keeping its connection open and idle for longer than seconds. How would I go about preventing these long-to-process messages from being abandoned?

The code for our worker role is below.

public class WorkerRole : RoleEntryPoint
{
    private static StandardKernel _kernel;
    private readonly ManualResetEvent _completedEvent = new ManualResetEvent(false);
    private BaseRepository<CallData> _callDataRepository;
    private BaseRepository<CallLog> _callLogRepository;

    private SubscriptionClient _client;
    private NamespaceManager _nManager;
    private OnMessageOptions _options;
    private BaseRepository<Site> _siteRepository;

    public override void Run()
    {
        try
        {
            List<CallInformation> callInfo;
            Trace.WriteLine("Starting processing of messages");

            // Initiates the message pump and callback is invoked for each message that is received, calling close on the client will stop the pump.

            _client.OnMessage(message =>
            {
                // Process message from subscription.
                Trace.TraceInformation("Call Received. Ready to process message ");
                message.RenewLock();
                callInfo = message.GetBody<List<CallInformation>>();
                writeCallData(callInfo);


                Trace.TraceInformation("Call Processed. Clearing from topic.");
            }, _options);
        }
        catch (Exception e)
        {
            Trace.TraceInformation("Error: " + e.Message + "---" + e.StackTrace);
        }
    }

    private void writeCallData(List<CallInformation> callList)
    {
        try
        {
            Trace.TraceInformation("Calls received: " + callList.Count);
            foreach (var callInfo in callList)
            {
                Trace.TraceInformation("Unwrapping call...");
                var call = callInfo.CallLog.Unwrap();
                Trace.TraceInformation("Begin Processing: Local Call " + call.ID + " with " + callInfo.DataPoints.Length + " datapoints");
                Trace.TraceInformation("Inserting Call...");
                _callLogRepository.ExecuteSqlCommand(/*SNIP: Insert call*/);
                    Trace.TraceInformation("Call entry written. Now building datapoint list...");
                    var datapoints = callInfo.DataPoints.Select(datapoint => datapoint.Unwrap()).ToList();
                    Trace.TraceInformation("datapoint list constructed. Processing datapoints...");
                    foreach (var data in datapoints)
                    {
                        /*SNIP: Long running code. Insert our datapoints one at a time. Sometimes our messages die in the middle of this foreach. */
                    }
                    Trace.TraceInformation("All datapoints written for call with dependable ID " + call.Call_ID);
                Trace.TraceInformation("Call Processed successfully.");
            }
        }
        catch (Exception e)
        {
            Trace.TraceInformation("Call Processing Failed. " + e.Message);
        }
    }

    public override bool OnStart()
    {
        try
        {
            var connectionString = CloudConfigurationManager.GetSetting("Microsoft.ServiceBus.ConnectionString");
            _nManager = NamespaceManager.CreateFromConnectionString(connectionString);
            _nManager.Settings.OperationTimeout = new TimeSpan(0,0,10,0);
            var topic = new TopicDescription("MyTopic")
            {
                DuplicateDetectionHistoryTimeWindow = new TimeSpan(0, 0, 10, 0),
                DefaultMessageTimeToLive = new TimeSpan(0, 0, 10, 0),
                RequiresDuplicateDetection = true,
            };
            if (!_nManager.TopicExists("MyTopic"))
            {
                _nManager.CreateTopic(topic);
            }
            if (!_nManager.SubscriptionExists("MyTopic", "AllMessages"))
            {
                _nManager.CreateSubscription("MyTopic", "AllMessages");
            }
            _client = SubscriptionClient.CreateFromConnectionString(connectionString, "MyTopic", "AllMessages",
                ReceiveMode.ReceiveAndDelete);
            _options = new OnMessageOptions
            {
                    AutoRenewTimeout = TimeSpan.FromMinutes(5),

            };
            _options.ExceptionReceived += LogErrors;
            CreateKernel();

            _callLogRepository.ExecuteSqlCommand(/*SNIP: Background processing*/);
        }
        catch (Exception e)
        {
            Trace.TraceInformation("Error on roleStart:" + e.Message + "---" + e.StackTrace);
        }
        return base.OnStart();
    }

    public override void OnStop()
    {
        // Close the connection to Service Bus Queue
        _client.Close();
        _completedEvent.Set();
    }

    void LogErrors(object sender, ExceptionReceivedEventArgs e)
    {
        if (e.Exception != null)
        {
            Trace.TraceInformation("Error: " + e.Exception.Message + "---" + e.Exception.StackTrace);
            _client.Close();
        }
    }

    public IKernel CreateKernel()
    {
        _kernel = new StandardKernel();
        /*SNIP: Bind NInjectable repositories */
        return _kernel;
    }
}

Your Run method does not go on indefinitely. It should look like this:

public override void Run()
{
   try
   {
      Trace.WriteLine("WorkerRole entrypoint called", "Information");
      while (true)
      {
         // Add code here that runs in the role instance
      }

   }
   catch (Exception e)
   {
      Trace.WriteLine("Exception during Run: " + e.ToString());
      // Take other action as needed.
   }
}

Taken from the docs :

The Run is considered the Main method for your application. Overriding the Run method is not required; the default implementation never returns. If you do override the Run method, your code should block indefinitely. If the Run method returns, the role is automatically recycled by raising the Stopping event and calling the OnStop method so that your shutdown sequences may be executed before the role is taken offline.

TheDude's response is very close to the correct answer! It turns out he's right that the run method needs to stay alive instead of returning immediately. With Azure Service Bus's message pump mechanism though, you can't place the _client.onMessage(...) inside a while loop, as this results in an error (The message pump has already been initialized).

What actually needs to happen is aa manual reset event needs to be created before the worker role begins executing, and then waited after the message pump code is executed. For documentation on ManualResetEvent, see https://msdn.microsoft.com/en-us/library/system.threading.manualresetevent(v=vs.110).aspx . Additionally, the process is described here: http://www.acousticguitar.pro/questions/607359/using-queueclient-onmessage-in-an-azure-worker-role

My final worker role class looks like this:

public class WorkerRole : RoleEntryPoint
{
    private static StandardKernel _kernel;
    private readonly ManualResetEvent _completedEvent = new ManualResetEvent(false);
    private BaseRepository<CallLog> _callLogRepository;

    private SubscriptionClient _client;
    private MessagingFactory _mFact;
    private NamespaceManager _nManager;
    private OnMessageOptions _options;

    public override void Run()
    {
        ManualResetEvent CompletedEvent = new ManualResetEvent(false);
        try
        {
            CallInformation callInfo;
            // Initiates the message pump and callback is invoked for each message that is received, calling close on the client will stop the pump.
            _client.OnMessage(message =>
            {
                // Process message from subscription.
                Trace.TraceInformation("Call Received. Ready to process message " + message.MessageId);
                callInfo = message.GetBody<CallInformation>();
                WriteCallData(callInfo);

                Trace.TraceInformation("Call Processed. Clearing from topic.");
            }, _options);
        }
        catch (Exception e)
        {
            Trace.TraceInformation("Error: " + e.Message + "---" + e.StackTrace);
        }
        CompletedEvent.WaitOne();
    }

private void writeCallData(List<CallInformation> callList)
{
    try
    {
        Trace.TraceInformation("Calls received: " + callList.Count);
        foreach (var callInfo in callList)
        {
            Trace.TraceInformation("Unwrapping call...");
            var call = callInfo.CallLog.Unwrap();
            Trace.TraceInformation("Begin Processing: Local Call " + call.ID + " with " + callInfo.DataPoints.Length + " datapoints");
            Trace.TraceInformation("Inserting Call...");
            _callLogRepository.ExecuteSqlCommand(/*SNIP: Insert call*/);
                Trace.TraceInformation("Call entry written. Now building datapoint list...");
                var datapoints = callInfo.DataPoints.Select(datapoint => datapoint.Unwrap()).ToList();
                Trace.TraceInformation("datapoint list constructed. Processing datapoints...");
                foreach (var data in datapoints)
                {
                    /*SNIP: Long running code. Insert our datapoints one at a time. Sometimes our messages die in the middle of this foreach. */
                }
                Trace.TraceInformation("All datapoints written for call with dependable ID " + call.Call_ID);
            Trace.TraceInformation("Call Processed successfully.");
        }
    }
    catch (Exception e)
    {
        Trace.TraceInformation("Call Processing Failed. " + e.Message);
    }
}

public override bool OnStart()
{
    try
    {
        var connectionString = CloudConfigurationManager.GetSetting("Microsoft.ServiceBus.ConnectionString");
        _nManager = NamespaceManager.CreateFromConnectionString(connectionString);
        _nManager.Settings.OperationTimeout = new TimeSpan(0,0,10,0);
        var topic = new TopicDescription("MyTopic")
        {
            DuplicateDetectionHistoryTimeWindow = new TimeSpan(0, 0, 10, 0),
            DefaultMessageTimeToLive = new TimeSpan(0, 0, 10, 0),
            RequiresDuplicateDetection = true,
        };
        if (!_nManager.TopicExists("MyTopic"))
        {
            _nManager.CreateTopic(topic);
        }
        if (!_nManager.SubscriptionExists("MyTopic", "AllMessages"))
        {
            _nManager.CreateSubscription("MyTopic", "AllMessages");
        }
        _client = SubscriptionClient.CreateFromConnectionString(connectionString, "MyTopic", "AllMessages",
            ReceiveMode.ReceiveAndDelete);
        _options = new OnMessageOptions
        {
                AutoRenewTimeout = TimeSpan.FromMinutes(5),

        };
        _options.ExceptionReceived += LogErrors;
        CreateKernel();

        _callLogRepository.ExecuteSqlCommand(/*SNIP: Background processing*/);
    }
    catch (Exception e)
    {
        Trace.TraceInformation("Error on roleStart:" + e.Message + "---" + e.StackTrace);
    }
    return base.OnStart();
}

public override void OnStop()
{
    // Close the connection to Service Bus Queue
    _client.Close();
    _completedEvent.Set();
}

void LogErrors(object sender, ExceptionReceivedEventArgs e)
{
    if (e.Exception != null)
    {
        Trace.TraceInformation("Error: " + e.Exception.Message + "---" + e.Exception.StackTrace);
        _client.Close();
    }
}

public IKernel CreateKernel()
{
    _kernel = new StandardKernel();
    /*SNIP: Bind NInjectable repositories */
    return _kernel;
}

}

You'll notice the presence of the ManualResetEvent and the invocation of WaitOne() at the end of my Run method. I hope someone finds this helpful!

The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM