Kea 3.2.0-git
ha_service.cc
Go to the documentation of this file.
1// Copyright (C) 2018-2026 Internet Systems Consortium, Inc. ("ISC")
2//
3// This Source Code Form is subject to the terms of the Mozilla Public
4// License, v. 2.0. If a copy of the MPL was not distributed with this
5// file, You can obtain one at http://mozilla.org/MPL/2.0/.
6
7#include <config.h>
8
9#include <command_creator.h>
10#include <ha_log.h>
11#include <ha_service.h>
12#include <ha_service_states.h>
14#include <cc/data.h>
16#include <config/timeouts.h>
17#include <dhcp/iface_mgr.h>
18#include <dhcpsrv/cfgmgr.h>
19#include <dhcpsrv/lease_mgr.h>
22#include <http/date_time.h>
23#include <http/response_json.h>
27#include <util/stopwatch.h>
28#include <boost/pointer_cast.hpp>
29#include <boost/make_shared.hpp>
30#include <boost/weak_ptr.hpp>
31#include <functional>
32#include <sstream>
33
34using namespace isc::asiolink;
35using namespace isc::config;
36using namespace isc::data;
37using namespace isc::dhcp;
38using namespace isc::hooks;
39using namespace isc::http;
40using namespace isc::log;
41using namespace isc::util;
42namespace ph = std::placeholders;
43
44namespace {
45
47class CommandUnsupportedError : public CtrlChannelError {
48public:
49 CommandUnsupportedError(const char* file, size_t line, const char* what) :
50 CtrlChannelError(file, line, what) {}
51};
52
54class ConflictError : public CtrlChannelError {
55public:
56 ConflictError(const char* file, size_t line, const char* what) :
57 CtrlChannelError(file, line, what) {}
58};
59
60}
61
62namespace isc {
63namespace ha {
64
75
76HAService::HAService(const unsigned int id, const IOServicePtr& io_service,
77 const NetworkStatePtr& network_state, const HAConfigPtr& config,
78 const HAServerType& server_type)
79 : id_(id), io_service_(io_service), network_state_(network_state), config_(config),
81 query_filter_(config), lease_sync_filter_(server_type, config), mutex_(),
82 pending_requests_(), lease_update_backlog_(config->getDelayedUpdatesLimit()),
84
85 if (server_type == HAServerType::DHCPv4) {
87
88 } else {
90 }
91
92 network_state_->enableService(getLocalOrigin());
93
95
96 // Create the client and(or) listener as appropriate.
97 if (!config_->getEnableMultiThreading()) {
98 // Not configured for multi-threading, start a client in ST mode.
99 client_.reset(new HttpClient(io_service_, false));
100 } else {
101 // Create an MT-mode client.
102 client_.reset(new HttpClient(io_service_, true,
103 config_->getHttpClientThreads(), true));
104
105 // If we're configured to use our own listener create and start it.
106 if (config_->getHttpDedicatedListener()) {
107 // Get the server address and port from this server's URL.
108 auto my_url = config_->getThisServerConfig()->getUrl();
109 IOAddress server_address(IOAddress::IPV4_ZERO_ADDRESS());
110 try {
111 // Since we do not currently support hostname resolution,
112 // we need to make sure we have an IP address here.
113 server_address = IOAddress(my_url.getStrippedHostname());
114 } catch (const std::exception& ex) {
115 isc_throw(Unexpected, "server Url:" << my_url.getStrippedHostname()
116 << " is not a valid IP address");
117 }
118
119 // Fetch how many threads the listener will use.
120 uint32_t listener_threads = config_->getHttpListenerThreads();
121
122 // Fetch the TLS context.
123 auto tls_context = config_->getThisServerConfig()->getTlsContext();
124
125 // Instantiate the listener.
126 listener_.reset(new CmdHttpListener(server_address, my_url.getPort(),
127 listener_threads, tls_context));
128 // Set the command filter when enabled.
129 if (config_->getRestrictCommands()) {
130 if (server_type == HAServerType::DHCPv4) {
133 } else {
136 }
137 }
138 }
139 }
140
142 .arg(config_->getThisServerName())
143 .arg(HAConfig::HAModeToString(config->getHAMode()))
144 .arg(HAConfig::PeerConfig::roleToString(config->getThisServerConfig()->getRole()));
145}
146
148 // Stop client and/or listener.
150
151 network_state_->enableService(getLocalOrigin());
152}
153
154std::string
155HAService::getCSCallbacksSetName() const {
156 std::ostringstream s;
157 s << "HA_MT_" << id_;
158 return (s.str());
159}
160
161void
164
165 defineEvent(HA_HEARTBEAT_COMPLETE_EVT, "HA_HEARTBEAT_COMPLETE_EVT");
166 defineEvent(HA_LEASE_UPDATES_COMPLETE_EVT, "HA_LEASE_UPDATES_COMPLETE_EVT");
167 defineEvent(HA_SYNCING_FAILED_EVT, "HA_SYNCING_FAILED_EVT");
168 defineEvent(HA_SYNCING_SUCCEEDED_EVT, "HA_SYNCING_SUCCEEDED_EVT");
169 defineEvent(HA_MAINTENANCE_NOTIFY_EVT, "HA_MAINTENANCE_NOTIFY_EVT");
170 defineEvent(HA_MAINTENANCE_START_EVT, "HA_MAINTENANCE_START_EVT");
171 defineEvent(HA_MAINTENANCE_CANCEL_EVT, "HA_MAINTENANCE_CANCEL_EVT");
172 defineEvent(HA_SYNCED_PARTNER_UNAVAILABLE_EVT, "HA_SYNCED_PARTNER_UNAVAILABLE_EVT");
173}
174
175void
178
187}
188
189void
192
194 std::bind(&HAService::backupStateHandler, this),
195 config_->getStateMachineConfig()->getStateConfig(HA_BACKUP_ST)->getPausing());
196
198 std::bind(&HAService::communicationRecoveryHandler, this),
199 config_->getStateMachineConfig()->getStateConfig(HA_COMMUNICATION_RECOVERY_ST)->getPausing());
200
202 std::bind(&HAService::normalStateHandler, this),
203 config_->getStateMachineConfig()->getStateConfig(HA_HOT_STANDBY_ST)->getPausing());
204
206 std::bind(&HAService::normalStateHandler, this),
207 config_->getStateMachineConfig()->getStateConfig(HA_LOAD_BALANCING_ST)->getPausing());
208
210 std::bind(&HAService::inMaintenanceStateHandler, this),
211 config_->getStateMachineConfig()->getStateConfig(HA_IN_MAINTENANCE_ST)->getPausing());
212
214 std::bind(&HAService::partnerDownStateHandler, this),
215 config_->getStateMachineConfig()->getStateConfig(HA_PARTNER_DOWN_ST)->getPausing());
216
218 std::bind(&HAService::partnerInMaintenanceStateHandler, this),
219 config_->getStateMachineConfig()->getStateConfig(HA_PARTNER_IN_MAINTENANCE_ST)->getPausing());
220
222 std::bind(&HAService::passiveBackupStateHandler, this),
223 config_->getStateMachineConfig()->getStateConfig(HA_PASSIVE_BACKUP_ST)->getPausing());
224
226 std::bind(&HAService::readyStateHandler, this),
227 config_->getStateMachineConfig()->getStateConfig(HA_READY_ST)->getPausing());
228
230 std::bind(&HAService::syncingStateHandler, this),
231 config_->getStateMachineConfig()->getStateConfig(HA_SYNCING_ST)->getPausing());
232
234 std::bind(&HAService::terminatedStateHandler, this),
235 config_->getStateMachineConfig()->getStateConfig(HA_TERMINATED_ST)->getPausing());
236
238 std::bind(&HAService::waitingStateHandler, this),
239 config_->getStateMachineConfig()->getStateConfig(HA_WAITING_ST)->getPausing());
240}
241
242void
243HAService::backupStateHandler() {
244 if (doOnEntry()) {
245 query_filter_.serveNoScopes();
247
248 // Log if the state machine is paused.
250 }
251
252 // There is nothing to do in that state. This server simply receives
253 // lease updates from the partners.
255}
256
257void
258HAService::communicationRecoveryHandler() {
259 if (doOnEntry()) {
260 query_filter_.serveDefaultScopes();
262
263 // Log if the state machine is paused.
265 }
266
268
271
272 // Check if the clock skew is still acceptable. If not, transition to
273 // the terminated state.
274 } else if (shouldTerminate()) {
276
277 } else if (isPartnerStateInvalid()) {
279
280 } else {
281
282 // Transitions based on the partner's state.
283 switch (communication_state_->getPartnerState()) {
286 break;
287
290 break;
291
294 break;
295
296 case HA_TERMINATED_ST:
298 break;
299
301 if (shouldPartnerDown()) {
303
304 } else {
306 }
307 break;
308
309 case HA_WAITING_ST:
310 case HA_SYNCING_ST:
311 case HA_READY_ST:
312 // The partner seems to be waking up, perhaps after communication-recovery.
313 // If our backlog queue is overflown we need to synchronize our lease database.
314 // There is no need to send ha-reset to the partner because the partner is
315 // already synchronizing its lease database.
316 if (!communication_state_->isCommunicationInterrupted() &&
317 lease_update_backlog_.wasOverflown()) {
319 } else {
320 // Backlog was not overflown, so there is no need to synchronize our
321 // lease database. Let's wait until our partner completes synchronization
322 // and transitions to the load-balancing state.
324 }
325 break;
326
327 default:
328 // If the communication is still interrupted, let's continue sitting
329 // in this state until it is resumed or until the transition to the
330 // partner-down state, depending on what happens first.
331 if (communication_state_->isCommunicationInterrupted()) {
333 break;
334 }
335
336 // The communication has been resumed. The partner server must be in a state
337 // in which it can receive outstanding lease updates we collected. The number of
338 // outstanding lease updates must not exceed the configured limit. Finally, the
339 // lease updates must be successfully sent. If that all works, we will transition
340 // to the normal operation.
341 if ((communication_state_->getPartnerState() == getNormalState()) ||
342 (communication_state_->getPartnerState() == HA_COMMUNICATION_RECOVERY_ST)) {
343 if (lease_update_backlog_.wasOverflown() || !sendLeaseUpdatesFromBacklog()) {
344 // If our lease backlog was overflown or we were unable to send lease
345 // updates to the partner we should notify the partner that it should
346 // synchronize the lease database. We do it by sending ha-reset command.
347 if (sendHAReset()) {
349 }
350 break;
351 }
352 // The backlog was not overflown and we successfully sent our lease updates.
353 // We can now transition to the normal operation state. If the partner
354 // fails to send his outstanding lease updates to us it should send the
355 // ha-reset command to us.
357 break;
358 }
359
360 // The partner appears to be in unexpected state, we have exceeded the number
361 // of lease updates in a backlog or an attempt to send lease updates failed.
362 // In all these cases we follow plan B and transition to the waiting state.
363 // The server will then attempt to synchronize the entire lease database.
365 }
366 }
367
368 // When exiting this state we must ensure that lease updates backlog is cleared.
369 if (doOnExit()) {
370 lease_update_backlog_.clear();
371 }
372}
373
374void
375HAService::normalStateHandler() {
376 // If we are transitioning from another state, we have to define new
377 // serving scopes appropriate for the new state. We don't do it if
378 // we remain in this state.
379 if (doOnEntry()) {
380 query_filter_.serveDefaultScopes();
382
383 // Log if the state machine is paused.
385 }
386
388
391 return;
392 }
393
394 // Check if the clock skew is still acceptable. If not, transition to
395 // the terminated state.
396 if (shouldTerminate()) {
398 return;
399 }
400
401 // Check if the partner state is valid per current configuration. If it is
402 // in an invalid state let's transition to the waiting state and stay there
403 // until the configuration is corrected.
404 if (isPartnerStateInvalid()) {
406 return;
407 }
408
409 switch (communication_state_->getPartnerState()) {
412 break;
413
416 break;
417
420 break;
421
422 case HA_TERMINATED_ST:
424 break;
425
427 if (shouldPartnerDown()) {
429
430 } else if (config_->amAllowingCommRecovery()) {
432
433 } else {
435 }
436 break;
437
438 default:
440 }
441
442 if (doOnExit()) {
443 // Do nothing here but doOnExit() call clears the "on exit" flag
444 // when transitioning to the communication-recovery state. In that
445 // state we need this flag to be cleared.
446 }
447}
448
449void
450HAService::inMaintenanceStateHandler() {
451 // If we are transitioning from another state, we have to define new
452 // serving scopes appropriate for the new state. We don't do it if
453 // we remain in this state.
454 if (doOnEntry()) {
455 // In this state the server remains silent and waits for being
456 // shutdown.
457 query_filter_.serveNoScopes();
459
460 // Log if the state machine is paused.
462
464 .arg(config_->getThisServerName());
465 }
466
468
469 // We don't transition out of this state unless explicitly mandated
470 // by the administrator via a dedicated command which cancels
471 // the maintenance.
473}
474
475void
476HAService::partnerDownStateHandler() {
477 // If we are transitioning from another state, we have to define new
478 // serving scopes appropriate for the new state. We don't do it if
479 // we remain in this state.
480 if (doOnEntry()) {
481
482 bool maintenance = (getLastEvent() == HA_MAINTENANCE_START_EVT);
483
484 // It may be administratively disabled to handle partner's scope
485 // in case of failure. If this is the case we'll just handle our
486 // default scope (or no scope at all). The user will need to
487 // manually enable this server to handle partner's scope.
488 // If we're in the maintenance mode we serve all scopes because
489 // it is not a failover situation.
490 if (maintenance || config_->getThisServerConfig()->isAutoFailover()) {
491 query_filter_.serveFailoverScopes();
492 } else {
493 query_filter_.serveDefaultScopes();
494 }
496 communication_state_->clearRejectedLeaseUpdates();
497
498 // Log if the state machine is paused.
500
501 if (maintenance) {
502 // If we ended up in the partner-down state as a result of
503 // receiving the ha-maintenance-start command let's log it.
505 .arg(config_->getThisServerName());
506 }
507
509 // Partner sent the ha-sync-complete-notify command to indicate that
510 // it has successfully synchronized its lease database but this server
511 // was unable to send heartbeat to this server. Enable the DHCP service
512 // and continue serving the clients in the partner-down state until the
513 // communication with the partner is fixed.
515 }
516
518
521 return;
522 }
523
524 // Check if the clock skew is still acceptable. If not, transition to
525 // the terminated state.
526 if (shouldTerminate()) {
528 return;
529 }
530
531 // Check if the partner state is valid per current configuration. If it is
532 // in an invalid state let's transition to the waiting state and stay there
533 // until the configuration is corrected.
534 if (isPartnerStateInvalid()) {
536 return;
537 }
538
539 switch (communication_state_->getPartnerState()) {
544 break;
545
546 case HA_READY_ST:
547 // If partner allocated new leases for which it didn't send lease updates
548 // to us we should synchronize our database.
549 if (communication_state_->hasPartnerNewUnsentUpdates()) {
551 } else {
552 // We did not miss any lease updates. There is no need to synchronize
553 // the database.
555 }
556 break;
557
558 case HA_TERMINATED_ST:
560 break;
561
562 default:
564 }
565}
566
567void
568HAService::partnerInMaintenanceStateHandler() {
569 // If we are transitioning from another state, we have to define new
570 // serving scopes appropriate for the new state. We don't do it if
571 // we remain in this state.
572 if (doOnEntry()) {
573 query_filter_.serveFailoverScopes();
574
576
577 // Log if the state machine is paused.
579
581 .arg(config_->getThisServerName());
582 }
583
585
586 if (isModelPaused()) {
588 return;
589 }
590
591 // Check if the clock skew is still acceptable. If not, transition to
592 // the terminated state.
593 if (shouldTerminate()) {
595 return;
596 }
597
598 switch (communication_state_->getPartnerState()) {
601 break;
602 default:
604 }
605}
606
607void
608HAService::passiveBackupStateHandler() {
609 // If we are transitioning from another state, we have to define new
610 // serving scopes appropriate for the new state. We don't do it if
611 // we remain in this state.
612 if (doOnEntry()) {
613 query_filter_.serveDefaultScopes();
615
616 // In the passive-backup state we don't send heartbeat.
617 communication_state_->stopHeartbeat();
618
619 // Log if the state machine is paused.
621 }
623}
624
625void
626HAService::readyStateHandler() {
627 // If we are transitioning from another state, we have to define new
628 // serving scopes appropriate for the new state. We don't do it if
629 // we remain in this state.
630 if (doOnEntry()) {
631 query_filter_.serveNoScopes();
633 communication_state_->clearRejectedLeaseUpdates();
634
635 // Log if the state machine is paused.
637 }
638
640
643 return;
644 }
645
646 // Check if the clock skew is still acceptable. If not, transition to
647 // the terminated state.
648 if (shouldTerminate()) {
650 return;
651 }
652
653 // Check if the partner state is valid per current configuration. If it is
654 // in an invalid state let's transition to the waiting state and stay there
655 // until the configuration is corrected.
656 if (isPartnerStateInvalid()) {
658 return;
659 }
660
661 switch (communication_state_->getPartnerState()) {
666 break;
667
670 break;
671
674 break;
675
676 case HA_READY_ST:
677 // If both servers are ready, the primary server "wins" and is
678 // transitioned first.
679 if (config_->getThisServerConfig()->getRole() == HAConfig::PeerConfig::PRIMARY) {
682 } else {
684 }
685 break;
686
687 case HA_TERMINATED_ST:
689 break;
690
692 if (shouldPartnerDown()) {
694
695 } else {
697 }
698 break;
699
700 default:
702 }
703}
704
705void
706HAService::syncingStateHandler() {
707 // If we are transitioning from another state, we have to define new
708 // serving scopes appropriate for the new state. We don't do it if
709 // we remain in this state.
710 if (doOnEntry()) {
711 query_filter_.serveNoScopes();
713 communication_state_->clearRejectedLeaseUpdates();
714
715 // Log if the state machine is paused.
717 }
718
721 return;
722 }
723
724 // Check if the clock skew is still acceptable. If not, transition to
725 // the terminated state.
726 if (shouldTerminate()) {
728 return;
729 }
730
731 // Check if the partner state is valid per current configuration. If it is
732 // in an invalid state let's transition to the waiting state and stay there
733 // until the configuration is corrected.
734 if (isPartnerStateInvalid()) {
736 return;
737 }
738
739 // We don't want to perform synchronous attempt to synchronize with
740 // a partner until we know that the partner is responding. Therefore,
741 // we wait for the heartbeat to complete successfully before we
742 // initiate the synchronization.
743 switch (communication_state_->getPartnerState()) {
744 case HA_TERMINATED_ST:
746 return;
747
749 // If the partner appears to be offline, let's transition to the partner
750 // down state. Otherwise, we'd be stuck trying to synchronize with a
751 // dead partner.
752 if (shouldPartnerDown()) {
754
755 } else {
757 }
758 break;
759
760 default:
761 // We don't want the heartbeat to interfere with the synchronization,
762 // so let's temporarily stop it.
763 communication_state_->stopHeartbeat();
764
765 // Timeout is configured in milliseconds. Need to convert to seconds.
766 unsigned int dhcp_disable_timeout =
767 static_cast<unsigned int>(config_->getSyncTimeout() / 1000);
768 if (dhcp_disable_timeout == 0) {
769 ++dhcp_disable_timeout;
770 }
771
772 // Perform synchronous leases update.
773 std::string status_message;
774 int sync_status = synchronize(status_message,
775 config_->getFailoverPeerConfig(),
776 dhcp_disable_timeout);
777
778 // If the leases synchronization was successful, let's transition
779 // to the ready state.
780 if (sync_status == CONTROL_RESULT_SUCCESS) {
782
783 } else {
784 // If the synchronization was unsuccessful we're back to the
785 // situation that the partner is unavailable and therefore
786 // we stay in the syncing state.
788 }
789 }
790
791 // Make sure that the heartbeat is re-enabled.
793}
794
795void
796HAService::terminatedStateHandler() {
797 // If we are transitioning from another state, we have to define new
798 // serving scopes appropriate for the new state. We don't do it if
799 // we remain in this state.
800 if (doOnEntry()) {
801 query_filter_.serveDefaultScopes();
803 communication_state_->clearRejectedLeaseUpdates();
804
805 // In the terminated state we don't send heartbeat.
806 communication_state_->stopHeartbeat();
807
808 // Log if the state machine is paused.
810
812 .arg(config_->getThisServerName());
813 }
814
816}
817
818void
819HAService::waitingStateHandler() {
820 // If we are transitioning from another state, we have to define new
821 // serving scopes appropriate for the new state. We don't do it if
822 // we remain in this state.
823 if (doOnEntry()) {
824 query_filter_.serveNoScopes();
826 communication_state_->clearRejectedLeaseUpdates();
827
828 // Log if the state machine is paused.
830 }
831
832 // Only schedule the heartbeat for non-backup servers.
833 if ((config_->getHAMode() != HAConfig::PASSIVE_BACKUP) &&
834 (config_->getThisServerConfig()->getRole() != HAConfig::PeerConfig::BACKUP)) {
836 }
837
840 return;
841 }
842
843 // Backup server must remain in its own state.
844 if (config_->getThisServerConfig()->getRole() == HAConfig::PeerConfig::BACKUP) {
846 return;
847 }
848
849 // We're not a backup server, so we're either primary or secondary. If this is
850 // a passive-backup mode of operation, we're primary and we should transition
851 // to the passive-backup state.
852 if (config_->getHAMode() == HAConfig::PASSIVE_BACKUP) {
854 return;
855 }
856
857 // Check if the clock skew is still acceptable. If not, transition to
858 // the terminated state.
859 if (shouldTerminate()) {
861 return;
862 }
863
864 // Check if the partner state is valid per current configuration. If it is
865 // in an invalid state let's sit in the waiting state until the configuration
866 // is corrected.
867 if (isPartnerStateInvalid()) {
869 return;
870 }
871
872 switch (communication_state_->getPartnerState()) {
879 case HA_READY_ST:
880 // If we're configured to not synchronize lease database, proceed directly
881 // to the "ready" state.
882 verboseTransition(config_->amSyncingLeases() ? HA_SYNCING_ST : HA_READY_ST);
883 break;
884
885 case HA_SYNCING_ST:
887 break;
888
889 case HA_TERMINATED_ST: {
890 auto partner_in_terminated = communication_state_->getDurationSincePartnerStateTime();
891 if (!partner_in_terminated.is_not_a_date_time() &&
892 (partner_in_terminated.total_seconds()) / 60 >= HA_WAITING_TO_TERMINATED_ST_DELAY_MINUTES) {
894 .arg(config_->getThisServerName())
897 break;
898 }
899
900 // We have checked above whether the clock skew is exceeding the threshold
901 // and we should terminate. If we're here, it means that the clock skew
902 // is acceptable. The partner may be still in the terminated state because
903 // it hasn't been restarted yet. Probably, this server is the first one
904 // being restarted after syncing the clocks. Let's just sit in the waiting
905 // state until the partner gets restarted.
907 .arg(config_->getThisServerName());
909 break;
910 }
911 case HA_WAITING_ST:
912 // If both servers are waiting, the primary server 'wins' and is
913 // transitioned to the next state first.
914 if (config_->getThisServerConfig()->getRole() == HAConfig::PeerConfig::PRIMARY) {
915 // If we're configured to not synchronize lease database, proceed directly
916 // to the "ready" state.
917 verboseTransition(config_->amSyncingLeases() ? HA_SYNCING_ST : HA_READY_ST);
918
919 } else {
921 }
922 break;
923
925 if (shouldPartnerDown()) {
927
928 } else {
930 }
931 break;
932
933 default:
935 }
936}
937
938void
939HAService::verboseTransition(const unsigned state) {
940 // Get current and new state name.
941 std::string current_state_name = getStateLabel(getCurrState());
942 std::string new_state_name = getStateLabel(state);
943
944 // Turn them to upper case so as they are better visible in the logs.
945 boost::to_upper(current_state_name);
946 boost::to_upper(new_state_name);
947
948 if (config_->getHAMode() != HAConfig::PASSIVE_BACKUP) {
949 // If this is load-balancing or hot-standby mode we also want to log
950 // partner's state.
951 auto partner_state = communication_state_->getPartnerState();
952 std::string partner_state_name = getStateLabel(partner_state);
953 boost::to_upper(partner_state_name);
954
955 // Log the transition.
957 .arg(config_->getThisServerName())
958 .arg(current_state_name)
959 .arg(new_state_name)
960 .arg(partner_state_name);
961
962 } else {
963 // In the passive-backup mode we don't know the partner's state.
965 .arg(config_->getThisServerName())
966 .arg(current_state_name)
967 .arg(new_state_name);
968 }
969
970 // If we're transitioning directly from the "waiting" to "ready"
971 // state it indicates that the database synchronization is
972 // administratively disabled. Let's remind the user about this
973 // configuration setting.
974 if ((state == HA_READY_ST) && (getCurrState() == HA_WAITING_ST)) {
976 .arg(config_->getThisServerName());
977 }
978
979 // Do the actual transition.
980 transition(state, getNextEvent());
981
982 // Inform the administrator whether or not lease updates are generated.
983 // Updates are never generated by a backup server so it doesn't make
984 // sense to log anything for the backup server.
985 if ((config_->getHAMode() != HAConfig::PASSIVE_BACKUP) &&
986 (config_->getThisServerConfig()->getRole() != HAConfig::PeerConfig::BACKUP)) {
987 if (shouldSendLeaseUpdates(config_->getFailoverPeerConfig())) {
989 .arg(config_->getThisServerName())
990 .arg(new_state_name);
991
992 } else if (!config_->amSendingLeaseUpdates()) {
993 // Lease updates are administratively disabled.
995 .arg(config_->getThisServerName())
996 .arg(new_state_name);
997
998 } else {
999 // Lease updates are not administratively disabled, but they
1000 // are not issued because this is the backup server or because
1001 // in this state the server should not generate lease updates.
1003 .arg(config_->getThisServerName())
1004 .arg(new_state_name);
1005 }
1006 }
1007}
1008
1009int
1011 if (config_->getThisServerConfig()->getRole() == HAConfig::PeerConfig::BACKUP) {
1012 return (HA_BACKUP_ST);
1013 }
1014
1015 switch (config_->getHAMode()) {
1017 return (HA_LOAD_BALANCING_ST);
1019 return (HA_HOT_STANDBY_ST);
1020 default:
1021 return (HA_PASSIVE_BACKUP_ST);
1022 }
1023}
1024
1025bool
1027 if (isModelPaused()) {
1029 .arg(config_->getThisServerName());
1030 unpauseModel();
1031 return (true);
1032 }
1033 return (false);
1034}
1035
1036void
1038 // Inform the administrator if the state machine is paused.
1039 if (isModelPaused()) {
1040 std::string state_name = stateToString(getCurrState());
1041 boost::to_upper(state_name);
1043 .arg(config_->getThisServerName())
1044 .arg(state_name);
1045 }
1046}
1047
1048void
1050 query_filter_.serveDefaultScopes();
1051}
1052
1053void
1055 query_filter_.serveFailoverScopes();
1056}
1057
1058bool
1060 return (inScopeInternal(query4));
1061}
1062
1063bool
1065 return (inScopeInternal(query6));
1066}
1067
1068template<typename QueryPtrType>
1069bool
1070HAService::inScopeInternal(QueryPtrType& query) {
1071 // Check if the query is in scope (should be processed by this server).
1072 std::string scope_class;
1073 const bool in_scope = query_filter_.inScope(query, scope_class);
1074 // Whether or not the query is going to be processed by this server,
1075 // we associate the query with the appropriate class.
1076 query->addClass(dhcp::ClientClass(scope_class));
1077 // The following is the part of the server failure detection algorithm.
1078 // If the query should be processed by the partner we need to check if
1079 // the partner responds. If the number of unanswered queries exceeds a
1080 // configured threshold, we will consider the partner to be offline.
1081 if (!in_scope && communication_state_->isCommunicationInterrupted()) {
1082 communication_state_->analyzeMessage(query);
1083 }
1084 // Indicate if the query is in scope.
1085 return (in_scope);
1086}
1087
1088bool
1090 return (shouldReclaimInternal(lease4));
1091}
1092
1093bool
1095 return (shouldReclaimInternal(lease6));
1096}
1097
1098template<typename LeasePtrType>
1099bool
1100HAService::shouldReclaimInternal(const LeasePtrType& lease) const {
1101 return (getCurrState() != HA_TERMINATED_ST || query_filter_.inScope(lease));
1102}
1103
1104void
1106 std::string current_state_name = getStateLabel(getCurrState());
1107 boost::to_upper(current_state_name);
1108
1109 // DHCP service should be enabled in the following states.
1110 const bool should_enable = ((getCurrState() == HA_COMMUNICATION_RECOVERY_ST) ||
1117
1118 if (!should_enable && network_state_->isServiceEnabled()) {
1119 current_state_name = getStateLabel(getCurrState());
1120 boost::to_upper(current_state_name);
1122 .arg(config_->getThisServerName())
1123 .arg(current_state_name);
1124 network_state_->disableService(getLocalOrigin());
1125
1126 } else if (should_enable && !network_state_->isServiceEnabled()) {
1127 current_state_name = getStateLabel(getCurrState());
1128 boost::to_upper(current_state_name);
1130 .arg(config_->getThisServerName())
1131 .arg(current_state_name);
1132 network_state_->enableService(getLocalOrigin());
1133 }
1134}
1135
1136bool
1138 // Checking whether the communication with the partner is OK is the
1139 // first step towards verifying if the server is up.
1140 if (communication_state_->isCommunicationInterrupted()) {
1141 // If the communication is interrupted, we also have to check
1142 // whether the partner answers DHCP requests. The only cases
1143 // when we don't (can't) do it are: the hot standby configuration
1144 // in which this server is a primary and when the DHCP service is
1145 // disabled so we can't analyze incoming traffic. Note that the
1146 // primary server can't check delayed responses to the partner
1147 // because the partner doesn't respond to any queries in this
1148 // configuration.
1149 if (network_state_->isServiceEnabled() &&
1150 ((config_->getHAMode() == HAConfig::LOAD_BALANCING) ||
1151 (config_->getThisServerConfig()->getRole() == HAConfig::PeerConfig::STANDBY))) {
1152 return (communication_state_->failureDetected());
1153 }
1154
1155 // Hot standby / primary case.
1156 return (true);
1157 }
1158
1159 // Shouldn't transition to the partner down state.
1160 return (false);
1161}
1162
1163bool
1165 // Check if skew is fatally large.
1166 bool should_terminate = communication_state_->clockSkewShouldTerminate();
1167
1168 // If not issue a warning if it's getting large.
1169 if (!should_terminate) {
1170 communication_state_->clockSkewShouldWarn();
1171 // Check if we should terminate because the number of rejected leases
1172 // has been exceeded.
1173 should_terminate = communication_state_->rejectedLeaseUpdatesShouldTerminate();
1174 }
1175
1176 return (should_terminate);
1177}
1178
1179bool
1183
1184bool
1186 switch (communication_state_->getPartnerState()) {
1188 if (config_->getHAMode() != HAConfig::LOAD_BALANCING) {
1190 .arg(config_->getThisServerName());
1191 return (true);
1192 }
1193 break;
1194
1195 case HA_HOT_STANDBY_ST:
1196 if (config_->getHAMode() != HAConfig::HOT_STANDBY) {
1198 .arg(config_->getThisServerName());
1199 return (true);
1200 }
1201 break;
1202
1204 if (config_->getHAMode() != HAConfig::LOAD_BALANCING) {
1206 .arg(config_->getThisServerName());
1207 return (true);
1208 }
1209 break;
1210
1211 default:
1212 ;
1213 }
1214 return (false);
1215}
1216
1217size_t
1219 const dhcp::Lease4CollectionPtr& leases,
1220 const dhcp::Lease4CollectionPtr& deleted_leases,
1221 const hooks::ParkingLotHandlePtr& parking_lot) {
1222
1223 // Get configurations of the peers. Exclude this instance.
1224 HAConfig::PeerConfigMap peers_configs = config_->getOtherServersConfig();
1225
1226 size_t sent_num = 0;
1227
1228 // Schedule sending lease updates to each peer.
1229 for (auto const& p : peers_configs) {
1230 HAConfig::PeerConfigPtr conf = p.second;
1231
1232 // Check if the lease updates should be queued. This is the case when the
1233 // server is in the communication-recovery state. Queued lease updates may
1234 // be sent when the communication is re-established.
1235 if (shouldQueueLeaseUpdates(conf)) {
1236 // Lease updates for deleted leases.
1237 for (auto const& l : *deleted_leases) {
1238 // If a released lease is preserved in the database send the lease
1239 // update to the partner. Otherwise, delete the lease.
1240 if (l->state_ == Lease4::STATE_RELEASED) {
1242 } else {
1244 }
1245 }
1246
1247 // Lease updates for new allocations and updated leases.
1248 for (auto const& l : *leases) {
1250 }
1251
1252 continue;
1253 }
1254
1255 // Check if the lease update should be sent to the server. If we're in
1256 // the partner-down state we don't send lease updates to the partner.
1257 if (!shouldSendLeaseUpdates(conf)) {
1258 // If we decide to not send the lease updates to an active partner, we
1259 // should make a record of it in the communication state. The partner
1260 // can check if there were any unsent lease updates when he determines
1261 // whether it should synchronize its database or not when it recovers
1262 // from the partner-down state.
1263 if (conf->getRole() != HAConfig::PeerConfig::BACKUP) {
1264 communication_state_->increaseUnsentUpdateCount();
1265 }
1266 continue;
1267 }
1268
1269 // Lease updates for deleted leases.
1270 for (auto const& l : *deleted_leases) {
1271 // If a released lease is preserved in the database send the lease
1272 // update to the partner. Otherwise, delete the lease.
1273 if (l->state_ == Lease4::STATE_RELEASED) {
1275 parking_lot);
1276 } else {
1278 parking_lot);
1279 }
1280 }
1281
1282 // Lease updates for new allocations and updated leases.
1283 for (auto const& l : *leases) {
1285 parking_lot);
1286 }
1287
1288 // If we're contacting a backup server from which we don't expect a
1289 // response prior to responding to the DHCP client we don't count
1290 // it.
1291 if ((config_->amWaitingBackupAck() || (conf->getRole() != HAConfig::PeerConfig::BACKUP))) {
1292 ++sent_num;
1293 }
1294 }
1295
1296 return (sent_num);
1297}
1298
1299size_t
1301 const dhcp::Lease4Ptr& lease,
1302 const hooks::ParkingLotHandlePtr& parking_lot) {
1304 leases->push_back(lease);
1305 Lease4CollectionPtr deleted_leases(new Lease4Collection());
1306
1307 return (asyncSendLeaseUpdates(query, leases, deleted_leases, parking_lot));
1308}
1309
1310size_t
1312 const dhcp::Lease6CollectionPtr& leases,
1313 const dhcp::Lease6CollectionPtr& deleted_leases,
1314 const hooks::ParkingLotHandlePtr& parking_lot) {
1315
1316 // Get configurations of the peers. Exclude this instance.
1317 HAConfig::PeerConfigMap peers_configs = config_->getOtherServersConfig();
1318
1319 size_t sent_num = 0;
1320
1321 // Schedule sending lease updates to each peer.
1322 for (auto const& p : peers_configs) {
1323 HAConfig::PeerConfigPtr conf = p.second;
1324
1325 // Check if the lease updates should be queued. This is the case when the
1326 // server is in the communication-recovery state. Queued lease updates may
1327 // be sent when the communication is re-established.
1328 if (shouldQueueLeaseUpdates(conf)) {
1329 for (auto const& l : *deleted_leases) {
1330 // If a released lease is preserved in the database send the lease
1331 // update to the partner. Otherwise, delete the lease.
1332 if (l->state_ == Lease4::STATE_RELEASED) {
1334 } else {
1336 }
1337 }
1338
1339 // Lease updates for new allocations and updated leases.
1340 for (auto const& l : *leases) {
1342 }
1343
1344 continue;
1345 }
1346
1347 // Check if the lease update should be sent to the server. If we're in
1348 // the partner-down state we don't send lease updates to the partner.
1349 if (!shouldSendLeaseUpdates(conf)) {
1350 // If we decide to not send the lease updates to an active partner, we
1351 // should make a record of it in the communication state. The partner
1352 // can check if there were any unsent lease updates when he determines
1353 // whether it should synchronize its database or not when it recovers
1354 // from the partner-down state.
1355 if (conf->getRole() != HAConfig::PeerConfig::BACKUP) {
1356 communication_state_->increaseUnsentUpdateCount();
1357 }
1358 continue;
1359 }
1360
1361 // If we're contacting a backup server from which we don't expect a
1362 // response prior to responding to the DHCP client we don't count
1363 // it.
1364 if (config_->amWaitingBackupAck() || (conf->getRole() != HAConfig::PeerConfig::BACKUP)) {
1365 ++sent_num;
1366 }
1367
1368 // Send new/updated leases and deleted leases in one command.
1369 asyncSendLeaseUpdate(query, conf, CommandCreator::createLease6BulkApply(leases, deleted_leases),
1370 parking_lot);
1371 }
1372
1373 return (sent_num);
1374}
1375
1376template<typename QueryPtrType>
1377bool
1379 const ParkingLotHandlePtr& parking_lot) {
1380 if (MultiThreadingMgr::instance().getMode()) {
1381 std::lock_guard<std::mutex> lock(mutex_);
1382 return (leaseUpdateCompleteInternal(query, parking_lot));
1383 } else {
1384 return (leaseUpdateCompleteInternal(query, parking_lot));
1385 }
1386}
1387
1388template<typename QueryPtrType>
1389bool
1390HAService::leaseUpdateCompleteInternal(QueryPtrType& query,
1391 const ParkingLotHandlePtr& parking_lot) {
1392 auto it = pending_requests_.find(query);
1393
1394 // If there are no more pending requests for this query, let's unpark
1395 // the DHCP packet.
1396 if (it == pending_requests_.end() || (--pending_requests_[query] <= 0)) {
1397 if (parking_lot) {
1398 parking_lot->unpark(query);
1399 }
1400
1401 // If we have unparked the packet we can clear pending requests for
1402 // this query.
1403 if (it != pending_requests_.end()) {
1404 pending_requests_.erase(it);
1405 }
1406 return (true);
1407 }
1408 return (false);
1409}
1410
1411template<typename QueryPtrType>
1412void
1414 if (MultiThreadingMgr::instance().getMode()) {
1415 std::lock_guard<std::mutex> lock(mutex_);
1416 updatePendingRequestInternal(query);
1417 } else {
1418 updatePendingRequestInternal(query);
1419 }
1420}
1421
1422template<typename QueryPtrType>
1423void
1424HAService::updatePendingRequestInternal(QueryPtrType& query) {
1425 if (pending_requests_.count(query) == 0) {
1426 pending_requests_[query] = 1;
1427 } else {
1428 ++pending_requests_[query];
1429 }
1430}
1431
1432template<typename QueryPtrType>
1433void
1434HAService::asyncSendLeaseUpdate(const QueryPtrType& query,
1436 const ConstElementPtr& command,
1437 const ParkingLotHandlePtr& parking_lot) {
1438 // Create HTTP/1.1 request including our command.
1439 PostHttpRequestJsonPtr request = boost::make_shared<PostHttpRequestJson>
1441 HostHttpHeader(config->getUrl().getStrippedHostname()));
1442 config->addBasicAuthHttpHeader(request);
1443 request->setBodyAsJson(command);
1444 request->finalize();
1445
1446 // Response object should also be created because the HTTP client needs
1447 // to know the type of the expected response.
1448 HttpResponseJsonPtr response = boost::make_shared<HttpResponseJson>();
1449
1450 // When possible we prefer to pass weak pointers to the queries, rather
1451 // than shared pointers, to avoid memory leaks in case cross reference
1452 // between the pointers.
1453 boost::weak_ptr<typename QueryPtrType::element_type> weak_query(query);
1454
1455 // Schedule asynchronous HTTP request.
1456 client_->asyncSendRequest(config->getUrl(), config->getTlsContext(),
1457 request, response,
1458 [this, weak_query, parking_lot, config]
1459 (const boost::system::error_code& ec,
1460 const HttpResponsePtr& http_response,
1461 const std::string& error_str) {
1462 // Get the shared pointer of the query. The server should keep the
1463 // pointer to the query and then park it. Therefore, we don't really
1464 // expect it to be null. If it is null, something is really wrong.
1465 QueryPtrType query_ptr = weak_query.lock();
1466 if (!query_ptr) {
1467 isc_throw(Unexpected, "query is null while receiving response from"
1468 " HA peer. This is programmatic error");
1469 }
1470
1471 // There are four possible groups of errors during the lease update.
1472 // One is the IO error causing issues in communication with the peer.
1473 // Another one is an HTTP parsing error. The third type occurs when
1474 // the partner receives the command but it is invalid or there is
1475 // an internal processing error. Finally, the forth type is when the
1476 // conflict status code is returned in the response indicating that
1477 // the lease update does not match the partner's configuration.
1478
1479 bool lease_update_success = true;
1480 bool lease_update_conflict = false;
1481
1482 // Handle first two groups of errors.
1483 if (ec || !error_str.empty()) {
1484 LOG_WARN(ha_logger, HA_LEASE_UPDATE_COMMUNICATIONS_FAILED)
1485 .arg(config_->getThisServerName())
1486 .arg(query_ptr->getLabel())
1487 .arg(config->getLogLabel())
1488 .arg(ec ? ec.message() : error_str);
1489
1490 // Communication error, so let's drop parked packet. The DHCP
1491 // response will not be sent.
1492 lease_update_success = false;
1493
1494 } else {
1495
1496 try {
1497 int rcode = 0;
1498 auto args = verifyAsyncResponse(http_response, rcode);
1499 // In the v6 case the server may return a list of failed lease
1500 // updates and we should log them.
1501 logFailedLeaseUpdates(query_ptr, args);
1502
1503 } catch (const ConflictError& ex) {
1504 // Handle forth group of errors.
1505 lease_update_conflict = true;
1506 lease_update_success = false;
1507 communication_state_->reportRejectedLeaseUpdate(query_ptr);
1508
1510 .arg(config_->getThisServerName())
1511 .arg(query_ptr->getLabel())
1512 .arg(config->getLogLabel())
1513 .arg(ex.what());
1514
1515 } catch (const std::exception& ex) {
1516 // Handle third group of errors.
1518 .arg(config_->getThisServerName())
1519 .arg(query_ptr->getLabel())
1520 .arg(config->getLogLabel())
1521 .arg(ex.what());
1522
1523 // Error while doing an update. The DHCP response will not be sent.
1524 lease_update_success = false;
1525 }
1526 }
1527
1528 // We don't care about the result of the lease update to the backup server.
1529 // It is a best effort update.
1530 if (config->getRole() != HAConfig::PeerConfig::BACKUP) {
1531 // If the lease update was unsuccessful we may need to set the partner
1532 // state as unavailable.
1533 if (!lease_update_success) {
1534 // Do not set it as unavailable if it was a conflict because the
1535 // partner actually responded.
1536 if (!lease_update_conflict) {
1537 // If we were unable to communicate with the partner we set partner's
1538 // state as unavailable.
1539 communication_state_->setPartnerUnavailable();
1540 }
1541 } else {
1542 // Lease update successful and we may need to clear some previously
1543 // rejected lease updates.
1544 communication_state_->reportSuccessfulLeaseUpdate(query_ptr);
1545 }
1546 }
1547
1548 // It is possible to configure the server to not wait for a response from
1549 // the backup server before we unpark the packet and respond to the client.
1550 // Here we check if we're dealing with such situation.
1551 if (config_->amWaitingBackupAck() || (config->getRole() != HAConfig::PeerConfig::BACKUP)) {
1552 // We're expecting a response from the backup server or it is not
1553 // a backup server and the lease update was unsuccessful. In such
1554 // case the DHCP exchange fails.
1555 if (!lease_update_success) {
1556 if (parking_lot) {
1557 parking_lot->drop(query_ptr);
1558 }
1559 }
1560 } else {
1561 // This was a response from the backup server and we're configured to
1562 // not wait for their acknowledgments, so there is nothing more to do.
1563 return;
1564 }
1565
1566 if (leaseUpdateComplete(query_ptr, parking_lot)) {
1567 // If we have finished sending the lease updates we need to run the
1568 // state machine until the state machine finds that additional events
1569 // are required, such as next heartbeat or a lease update. The runModel()
1570 // may transition to another state, schedule asynchronous tasks etc.
1571 // Then it returns control to the DHCP server.
1572 runModel(HA_LEASE_UPDATES_COMPLETE_EVT);
1573 }
1574 },
1576 std::bind(&HAService::clientConnectHandler, this, ph::_1, ph::_2),
1577 std::bind(&HAService::clientHandshakeHandler, this, ph::_1),
1578 std::bind(&HAService::clientCloseHandler, this, ph::_1)
1579 );
1580
1581 // The number of pending requests is the number of requests for which we
1582 // expect an acknowledgment prior to responding to the DHCP clients. If
1583 // we're configured to wait for the acks from the backups or it is not
1584 // a backup increase the number of pending requests.
1585 if (config_->amWaitingBackupAck() || (config->getRole() != HAConfig::PeerConfig::BACKUP)) {
1586 // Request scheduled, so update the request counters for the query.
1587 updatePendingRequest(query);
1588 }
1589}
1590
1591bool
1593 // Never send lease updates if they are administratively disabled.
1594 if (!config_->amSendingLeaseUpdates()) {
1595 return (false);
1596 }
1597
1598 // Always send updates to the backup server.
1599 if (peer_config->getRole() == HAConfig::PeerConfig::BACKUP) {
1600 return (true);
1601 }
1602
1603 // Never send updates if this is a backup server.
1604 if (config_->getThisServerConfig()->getRole() == HAConfig::PeerConfig::BACKUP) {
1605 return (false);
1606 }
1607
1608 // In other case, whether we send lease updates or not depends on our
1609 // state.
1610 switch (getCurrState()) {
1611 case HA_HOT_STANDBY_ST:
1614 return (true);
1615
1616 default:
1617 ;
1618 }
1619
1620 return (false);
1621}
1622
1623bool
1625 if (!config_->amSendingLeaseUpdates()) {
1626 return (false);
1627 }
1628
1629 if (peer_config->getRole() == HAConfig::PeerConfig::BACKUP) {
1630 return (false);
1631 }
1632
1634}
1635
1636void
1638 const ConstElementPtr& args) const {
1639 // If there are no arguments, it means that the update was successful.
1640 if (!args || (args->getType() != Element::map)) {
1641 return;
1642 }
1643
1644 // Instead of duplicating the code between the failed-deleted-leases and
1645 // failed-leases, let's just have one function that does it for both.
1646 auto log_proc = [query, args](const std::string& param_name, const log::MessageID& mesid) {
1647 // Check if there are any failed leases.
1648 auto failed_leases = args->get(param_name);
1649
1650 // The failed leases must be a list.
1651 if (failed_leases && (failed_leases->getType() == Element::list)) {
1652 // Go over the failed leases and log each of them.
1653 for (unsigned i = 0; i < failed_leases->size(); ++i) {
1654 auto lease = failed_leases->get(i);
1655 if (lease->getType() == Element::map) {
1656
1657 // ip-address
1658 auto ip_address = lease->get("ip-address");
1659
1660 // lease type
1661 auto lease_type = lease->get("type");
1662
1663 // error-message
1664 auto error_message = lease->get("error-message");
1665
1666 LOG_INFO(ha_logger, mesid)
1667 .arg(query->getLabel())
1668 .arg(lease_type && (lease_type->getType() == Element::string) ?
1669 lease_type->stringValue() : "(unknown)")
1670 .arg(ip_address && (ip_address->getType() == Element::string) ?
1671 ip_address->stringValue() : "(unknown)")
1672 .arg(error_message && (error_message->getType() == Element::string) ?
1673 error_message->stringValue() : "(unknown)");
1674 }
1675 }
1676 }
1677 };
1678
1679 // Process "failed-deleted-leases"
1680 log_proc("failed-deleted-leases", HA_LEASE_UPDATE_DELETE_FAILED_ON_PEER);
1681
1682 // Process "failed-leases".
1683 log_proc("failed-leases", HA_LEASE_UPDATE_CREATE_UPDATE_FAILED_ON_PEER);
1684}
1685
1688 ElementPtr ha_servers = Element::createMap();
1689
1690 // Local part
1693 role = config_->getThisServerConfig()->getRole();
1694 std::string role_txt = HAConfig::PeerConfig::roleToString(role);
1695 local->set("role", Element::create(role_txt));
1696 int state = getCurrState();
1697 try {
1698 local->set("state", Element::create(stateToString(state)));
1699
1700 } catch (...) {
1701 // Empty string on error.
1702 local->set("state", Element::create(std::string()));
1703 }
1704 std::set<std::string> scopes = query_filter_.getServedScopes();
1706 for (auto const& scope : scopes) {
1707 list->add(Element::create(scope));
1708 }
1709 local->set("scopes", list);
1710 local->set("server-name", Element::create(config_->getThisServerName()));
1711 auto const my_time(communication_state_->getMyTimeAtSkew());
1712 if (my_time.is_not_a_date_time()) {
1713 local->set("system-time", Element::create());
1714 } else {
1715 local->set("system-time", Element::create(ptimeToText(my_time, 0)));
1716 }
1717 ha_servers->set("local", local);
1718
1719 // Do not include remote server information if this is a backup server or
1720 // we're in the passive-backup mode.
1721 if ((config_->getHAMode() == HAConfig::PASSIVE_BACKUP) ||
1722 (config_->getThisServerConfig()->getRole() == HAConfig::PeerConfig::BACKUP)) {
1723 return (ha_servers);
1724 }
1725
1726 // Remote part
1727 ElementPtr remote = communication_state_->getReport();
1728
1729 try {
1730 role = config_->getFailoverPeerConfig()->getRole();
1731 role_txt = HAConfig::PeerConfig::roleToString(role);
1732 remote->set("role", Element::create(role_txt));
1733
1734 } catch (...) {
1735 remote->set("role", Element::create(std::string()));
1736 }
1737 remote->set("server-name", Element::create(config_->getFailoverPeerConfig()->getName()));
1738 ha_servers->set("remote", remote);
1739
1740 return (ha_servers);
1741}
1742
1745 ElementPtr arguments = Element::createMap();
1746 std::string state_label = getState(getCurrState())->getLabel();
1747 arguments->set("state", Element::create(state_label));
1748
1749 std::string date_time = HttpDateTime().rfc1123Format();
1750 arguments->set("date-time", Element::create(date_time));
1751
1752 auto scopes = query_filter_.getServedScopes();
1753 ElementPtr scopes_list = Element::createList();
1754 for (auto const& scope : scopes) {
1755 scopes_list->add(Element::create(scope));
1756 }
1757 arguments->set("scopes", scopes_list);
1758
1759 arguments->set("unsent-update-count",
1760 Element::create(static_cast<int64_t>(communication_state_->getUnsentUpdateCount())));
1761
1762 return (createAnswer(CONTROL_RESULT_SUCCESS, "HA peer status returned.",
1763 arguments));
1764}
1765
1768 if (getCurrState() == HA_WAITING_ST) {
1769 return (createAnswer(CONTROL_RESULT_SUCCESS, "HA state machine already in WAITING state."));
1770 }
1773 return (createAnswer(CONTROL_RESULT_SUCCESS, "HA state machine reset."));
1774}
1775
1776void
1778 HAConfig::PeerConfigPtr partner_config = config_->getFailoverPeerConfig();
1779
1780 // If the sync_complete_notified_ is true it means that the partner
1781 // notified us that it had completed lease database synchronization.
1782 // We confirm that the partner is operational by sending the heartbeat
1783 // to it. Regardless if the partner responds to our heartbeats or not,
1784 // we should clear this flag. But, since we need the current value in
1785 // the async call handler, we save it in the local variable before
1786 // clearing it.
1787 bool sync_complete_notified = sync_complete_notified_;
1789
1790 // Create HTTP/1.1 request including our command.
1791 PostHttpRequestJsonPtr request = boost::make_shared<PostHttpRequestJson>
1793 HostHttpHeader(partner_config->getUrl().getStrippedHostname()));
1794 partner_config->addBasicAuthHttpHeader(request);
1795 request->setBodyAsJson(CommandCreator::createHeartbeat(config_->getThisServerName(),
1796 server_type_));
1797 request->finalize();
1798
1799 // Response object should also be created because the HTTP client needs
1800 // to know the type of the expected response.
1801 HttpResponseJsonPtr response = boost::make_shared<HttpResponseJson>();
1802
1803 // Schedule asynchronous HTTP request.
1804 client_->asyncSendRequest(partner_config->getUrl(),
1805 partner_config->getTlsContext(),
1806 request, response,
1807 [this, partner_config, sync_complete_notified]
1808 (const boost::system::error_code& ec,
1809 const HttpResponsePtr& http_response,
1810 const std::string& error_str) {
1811
1812 // There are three possible groups of errors during the heartbeat.
1813 // One is the IO error causing issues in communication with the peer.
1814 // Another one is an HTTP parsing error. The last type of error is
1815 // when non-success error code is returned in the response carried
1816 // in the HTTP message or if the JSON response is otherwise broken.
1817
1818 bool heartbeat_success = true;
1819
1820 // Handle first two groups of errors.
1821 if (ec || !error_str.empty()) {
1822 LOG_WARN(ha_logger, HA_HEARTBEAT_COMMUNICATIONS_FAILED)
1823 .arg(config_->getThisServerName())
1824 .arg(partner_config->getLogLabel())
1825 .arg(ec ? ec.message() : error_str);
1826 heartbeat_success = false;
1827
1828 } else {
1829
1830 // Handle third group of errors.
1831 try {
1832 // Response must contain arguments and the arguments must
1833 // be a map.
1834 int rcode = 0;
1835 ConstElementPtr args = verifyAsyncResponse(http_response, rcode);
1836 if (!args || args->getType() != Element::map) {
1837 isc_throw(CtrlChannelError, "returned arguments in the response"
1838 " must be a map");
1839 }
1840 // Response must include partner's state.
1841 ConstElementPtr state = args->get("state");
1842 if (!state || state->getType() != Element::string) {
1843 isc_throw(CtrlChannelError, "server state not returned in response"
1844 " to a ha-heartbeat command or it is not a string");
1845 }
1846 // Remember the partner's state. This may throw if the returned
1847 // state is invalid.
1848 communication_state_->setPartnerState(state->stringValue());
1849
1850 ConstElementPtr date_time = args->get("date-time");
1851 if (!date_time || date_time->getType() != Element::string) {
1852 isc_throw(CtrlChannelError, "date-time not returned in response"
1853 " to a ha-heartbeat command or it is not a string");
1854 }
1855 // Note the time returned by the partner to calculate the clock skew.
1856 communication_state_->setPartnerTime(date_time->stringValue());
1857
1858 // Remember the scopes served by the partner.
1859 try {
1860 auto scopes = args->get("scopes");
1861 communication_state_->setPartnerScopes(scopes);
1862
1863 } catch (...) {
1864 // We don't want to fail if the scopes are missing because
1865 // this would be incompatible with old HA hook library
1866 // versions. We may make it mandatory one day, but during
1867 // upgrades of existing HA setup it would be a real issue
1868 // if we failed here.
1869 }
1870
1871 // unsent-update-count was not present in earlier HA versions.
1872 // Let's check if the partner has sent the parameter. We initialized
1873 // the counter to 0, and it remains 0 if the partner doesn't send it.
1874 // It effectively means that we don't track partner's unsent updates
1875 // as in the earlier HA versions.
1876 auto unsent_update_count = args->get("unsent-update-count");
1877 if (unsent_update_count) {
1878 if (unsent_update_count->getType() != Element::integer) {
1879 isc_throw(CtrlChannelError, "unsent-update-count returned in"
1880 " the ha-heartbeat response is not an integer");
1881 }
1882 communication_state_->setPartnerUnsentUpdateCount(static_cast<uint64_t>
1883 (unsent_update_count->intValue()));
1884 }
1885
1886 } catch (const std::exception& ex) {
1888 .arg(config_->getThisServerName())
1889 .arg(partner_config->getLogLabel())
1890 .arg(ex.what());
1891 heartbeat_success = false;
1892 }
1893 }
1894
1895 // If heartbeat was successful, let's mark the connection with the
1896 // peer as healthy.
1897 if (heartbeat_success) {
1898 communication_state_->poke();
1899
1900 } else {
1901 // We were unable to retrieve partner's state, so let's mark it
1902 // as unavailable.
1903 communication_state_->setPartnerUnavailable();
1904 // Log if the communication is interrupted.
1905 if (communication_state_->isCommunicationInterrupted()) {
1906 LOG_WARN(ha_logger, HA_COMMUNICATION_INTERRUPTED)
1907 .arg(config_->getThisServerName())
1908 .arg(partner_config->getName());
1909 }
1910 }
1911
1912 startHeartbeat();
1913 // Even though the partner notified us about the synchronization completion,
1914 // we still can't communicate with the partner. Let's continue serving
1915 // the clients until the link is fixed.
1916 if (sync_complete_notified && !heartbeat_success) {
1917 postNextEvent(HA_SYNCED_PARTNER_UNAVAILABLE_EVT);
1918 }
1919 // Whatever the result of the heartbeat was, the state machine needs
1920 // to react to this. Let's run the state machine until the state machine
1921 // finds that some new events are required, i.e. next heartbeat or
1922 // lease update. The runModel() may transition to another state, schedule
1923 // asynchronous tasks etc. Then it returns control to the DHCP server.
1924 runModel(HA_HEARTBEAT_COMPLETE_EVT);
1925 },
1927 std::bind(&HAService::clientConnectHandler, this, ph::_1, ph::_2),
1928 std::bind(&HAService::clientHandshakeHandler, this, ph::_1),
1929 std::bind(&HAService::clientCloseHandler, this, ph::_1)
1930 );
1931}
1932
1933void
1935 if (!communication_state_->isHeartbeatRunning()) {
1937 }
1938}
1939
1940void
1942 if (config_->getHeartbeatDelay() > 0) {
1943 communication_state_->startHeartbeat(config_->getHeartbeatDelay(),
1945 this));
1946 }
1947}
1948
1949void
1951 const HAConfig::PeerConfigPtr& remote_config,
1952 const unsigned int max_period,
1953 PostRequestCallback post_request_action) {
1954 // Create HTTP/1.1 request including our command.
1955 PostHttpRequestJsonPtr request = boost::make_shared<PostHttpRequestJson>
1957 HostHttpHeader(remote_config->getUrl().getStrippedHostname()));
1958
1959 remote_config->addBasicAuthHttpHeader(request);
1960 request->setBodyAsJson(CommandCreator::createDHCPDisable(getRemoteOrigin(),
1961 max_period,
1962 server_type_));
1963 request->finalize();
1964
1965 // Response object should also be created because the HTTP client needs
1966 // to know the type of the expected response.
1967 HttpResponseJsonPtr response = boost::make_shared<HttpResponseJson>();
1968
1969 // Schedule asynchronous HTTP request.
1970 http_client.asyncSendRequest(remote_config->getUrl(),
1971 remote_config->getTlsContext(),
1972 request, response,
1973 [this, remote_config, post_request_action]
1974 (const boost::system::error_code& ec,
1975 const HttpResponsePtr& http_response,
1976 const std::string& error_str) {
1977
1978 // There are three possible groups of errors during the heartbeat.
1979 // One is the IO error causing issues in communication with the peer.
1980 // Another one is an HTTP parsing error. The last type of error is
1981 // when non-success error code is returned in the response carried
1982 // in the HTTP message or if the JSON response is otherwise broken.
1983
1984 int rcode = 0;
1985 std::string error_message;
1986
1987 // Handle first two groups of errors.
1988 if (ec || !error_str.empty()) {
1989 error_message = (ec ? ec.message() : error_str);
1990 LOG_ERROR(ha_logger, HA_DHCP_DISABLE_COMMUNICATIONS_FAILED)
1991 .arg(config_->getThisServerName())
1992 .arg(remote_config->getLogLabel())
1993 .arg(error_message);
1994
1995 } else {
1996
1997 // Handle third group of errors.
1998 try {
1999 static_cast<void>(verifyAsyncResponse(http_response, rcode));
2000
2001 } catch (const std::exception& ex) {
2002 error_message = ex.what();
2004 .arg(config_->getThisServerName())
2005 .arg(remote_config->getLogLabel())
2006 .arg(error_message);
2007 }
2008 }
2009
2010 // If there was an error communicating with the partner, mark the
2011 // partner as unavailable.
2012 if (!error_message.empty()) {
2013 communication_state_->setPartnerUnavailable();
2014 }
2015
2016 // Invoke post request action if it was specified.
2017 if (post_request_action) {
2018 post_request_action(error_message.empty(),
2019 error_message,
2020 rcode);
2021 }
2022 },
2024 std::bind(&HAService::clientConnectHandler, this, ph::_1, ph::_2),
2025 std::bind(&HAService::clientHandshakeHandler, this, ph::_1),
2026 std::bind(&HAService::clientCloseHandler, this, ph::_1)
2027 );
2028}
2029
2030void
2032 const HAConfig::PeerConfigPtr& remote_config,
2033 PostRequestCallback post_request_action) {
2034 // Create HTTP/1.1 request including our command.
2035 PostHttpRequestJsonPtr request = boost::make_shared<PostHttpRequestJson>
2037 HostHttpHeader(remote_config->getUrl().getStrippedHostname()));
2038 remote_config->addBasicAuthHttpHeader(request);
2039 request->setBodyAsJson(CommandCreator::createDHCPEnable(getRemoteOrigin(),
2040 server_type_));
2041 request->finalize();
2042
2043 // Response object should also be created because the HTTP client needs
2044 // to know the type of the expected response.
2045 HttpResponseJsonPtr response = boost::make_shared<HttpResponseJson>();
2046
2047 // Schedule asynchronous HTTP request.
2048 http_client.asyncSendRequest(remote_config->getUrl(),
2049 remote_config->getTlsContext(),
2050 request, response,
2051 [this, remote_config, post_request_action]
2052 (const boost::system::error_code& ec,
2053 const HttpResponsePtr& http_response,
2054 const std::string& error_str) {
2055
2056 // There are three possible groups of errors during the heartbeat.
2057 // One is the IO error causing issues in communication with the peer.
2058 // Another one is an HTTP parsing error. The last type of error is
2059 // when non-success error code is returned in the response carried
2060 // in the HTTP message or if the JSON response is otherwise broken.
2061
2062 int rcode = 0;
2063 std::string error_message;
2064
2065 // Handle first two groups of errors.
2066 if (ec || !error_str.empty()) {
2067 error_message = (ec ? ec.message() : error_str);
2068 LOG_ERROR(ha_logger, HA_DHCP_ENABLE_COMMUNICATIONS_FAILED)
2069 .arg(config_->getThisServerName())
2070 .arg(remote_config->getLogLabel())
2071 .arg(error_message);
2072
2073 } else {
2074
2075 // Handle third group of errors.
2076 try {
2077 static_cast<void>(verifyAsyncResponse(http_response, rcode));
2078
2079 } catch (const std::exception& ex) {
2080 error_message = ex.what();
2082 .arg(config_->getThisServerName())
2083 .arg(remote_config->getLogLabel())
2084 .arg(error_message);
2085 }
2086 }
2087
2088 // If there was an error communicating with the partner, mark the
2089 // partner as unavailable.
2090 if (!error_message.empty()) {
2091 communication_state_->setPartnerUnavailable();
2092 }
2093
2094 // Invoke post request action if it was specified.
2095 if (post_request_action) {
2096 post_request_action(error_message.empty(),
2097 error_message,
2098 rcode);
2099 }
2100 },
2102 std::bind(&HAService::clientConnectHandler, this, ph::_1, ph::_2),
2103 std::bind(&HAService::clientHandshakeHandler, this, ph::_1),
2104 std::bind(&HAService::clientCloseHandler, this, ph::_1)
2105 );
2106}
2107
2108void
2110 network_state_->disableService(getLocalOrigin());
2111}
2112
2113void
2115 network_state_->enableService(getLocalOrigin());
2116}
2117
2118void
2120 PostSyncCallback null_action;
2121
2122 // Timeout is configured in milliseconds. Need to convert to seconds.
2123 unsigned int dhcp_disable_timeout =
2124 static_cast<unsigned int>(config_->getSyncTimeout() / 1000);
2125 if (dhcp_disable_timeout == 0) {
2126 // Ensure that we always use at least 1 second timeout.
2127 dhcp_disable_timeout = 1;
2128 }
2129
2130 lease_sync_filter_.apply();
2131 asyncSyncLeases(*client_, config_->getFailoverPeerConfig(),
2132 dhcp_disable_timeout, LeasePtr(), null_action);
2133}
2134
2135void
2137 const HAConfig::PeerConfigPtr& remote_config,
2138 const unsigned int max_period,
2139 const dhcp::LeasePtr& last_lease,
2140 PostSyncCallback post_sync_action,
2141 const bool dhcp_disabled) {
2142 // Synchronization starts with a command to disable DHCP service of the
2143 // peer from which we're fetching leases. We don't want the other server
2144 // to allocate new leases while we fetch from it. The DHCP service will
2145 // be disabled for a certain amount of time and will be automatically
2146 // re-enabled if we die during the synchronization.
2147 asyncDisableDHCPService(http_client, remote_config, max_period,
2148 [this, &http_client, remote_config, max_period, last_lease,
2149 post_sync_action, dhcp_disabled]
2150 (const bool success, const std::string& error_message, const int) {
2151
2152 // If we have successfully disabled the DHCP service on the peer,
2153 // we can start fetching the leases.
2154 if (success) {
2155 // The last argument indicates that disabling the DHCP
2156 // service on the partner server was successful.
2157 asyncSyncLeasesInternal(http_client, remote_config, max_period,
2158 last_lease, post_sync_action, true);
2159
2160 } else {
2161 post_sync_action(success, error_message, dhcp_disabled);
2162 }
2163 });
2164}
2165
2166void
2168 const HAConfig::PeerConfigPtr& remote_config,
2169 const unsigned int max_period,
2170 const dhcp::LeasePtr& last_lease,
2171 PostSyncCallback post_sync_action,
2172 const bool dhcp_disabled) {
2173 // Create HTTP/1.1 request including our command.
2174 PostHttpRequestJsonPtr request = boost::make_shared<PostHttpRequestJson>
2176 HostHttpHeader(remote_config->getUrl().getStrippedHostname()));
2177 remote_config->addBasicAuthHttpHeader(request);
2179 request->setBodyAsJson(CommandCreator::createLease4GetPage(
2180 boost::dynamic_pointer_cast<Lease4>(last_lease), config_->getSyncPageLimit()));
2181
2182 } else {
2183 request->setBodyAsJson(CommandCreator::createLease6GetPage(
2184 boost::dynamic_pointer_cast<Lease6>(last_lease), config_->getSyncPageLimit()));
2185 }
2186 request->finalize();
2187
2188 // Response object should also be created because the HTTP client needs
2189 // to know the type of the expected response.
2190 HttpResponseJsonPtr response = boost::make_shared<HttpResponseJson>();
2191
2192 // Schedule asynchronous HTTP request.
2193 http_client.asyncSendRequest(remote_config->getUrl(),
2194 remote_config->getTlsContext(),
2195 request, response,
2196 [this, remote_config, post_sync_action, &http_client, max_period, dhcp_disabled]
2197 (const boost::system::error_code& ec,
2198 const HttpResponsePtr& http_response,
2199 const std::string& error_str) {
2200
2201 // Holds last lease received on the page of leases. If the last
2202 // page was hit, this value remains null.
2203 LeasePtr last_lease_in_callback;
2204
2205 // There are three possible groups of errors during the heartbeat.
2206 // One is the IO error causing issues in communication with the peer.
2207 // Another one is an HTTP parsing error. The last type of error is
2208 // when non-success error code is returned in the response carried
2209 // in the HTTP message or if the JSON response is otherwise broken.
2210
2211 std::string error_message;
2212
2213 // Handle first two groups of errors.
2214 if (ec || !error_str.empty()) {
2215 error_message = (ec ? ec.message() : error_str);
2216 LOG_ERROR(ha_logger, HA_LEASES_SYNC_COMMUNICATIONS_FAILED)
2217 .arg(config_->getThisServerName())
2218 .arg(remote_config->getLogLabel())
2219 .arg(error_message);
2220
2221 } else {
2222 // Handle third group of errors.
2223 try {
2224 int rcode = 0;
2225 ConstElementPtr args = verifyAsyncResponse(http_response, rcode);
2226
2227 // Arguments must be a map.
2228 if (args && (args->getType() != Element::map)) {
2229 isc_throw(CtrlChannelError,
2230 "arguments in the received response must be a map");
2231 }
2232
2233 ConstElementPtr leases = args->get("leases");
2234 if (!leases || (leases->getType() != Element::list)) {
2235 isc_throw(CtrlChannelError,
2236 "server response does not contain leases argument or this"
2237 " argument is not a list");
2238 }
2239
2240 // Iterate over the leases and update the database as appropriate.
2241 auto const& leases_element = leases->listValue();
2242
2243 LOG_INFO(ha_logger, HA_LEASES_SYNC_LEASE_PAGE_RECEIVED)
2244 .arg(config_->getThisServerName())
2245 .arg(leases_element.size())
2246 .arg(remote_config->getLogLabel());
2247
2248 // Count actually applied leases.
2249 uint64_t applied_lease_count = 0;
2250 for (auto l = leases_element.begin(); l != leases_element.end(); ++l) {
2251 try {
2252
2253 if (server_type_ == HAServerType::DHCPv4) {
2254 Lease4Ptr lease = Lease4::fromElement(*l);
2255
2256 // If we're not on the last page and we're processing final lease on
2257 // this page, let's record the lease as input to the next
2258 // lease4-get-page command.
2259 if ((leases_element.size() >= config_->getSyncPageLimit()) &&
2260 (l + 1 == leases_element.end())) {
2261 last_lease_in_callback = boost::dynamic_pointer_cast<Lease>(lease);
2262 }
2263
2264 if (!lease_sync_filter_.shouldSync(lease)) {
2265 continue;
2266 }
2267
2268 // Check if there is such lease in the database already.
2269 Lease4Ptr existing_lease = LeaseMgrFactory::instance().getLease4(lease->addr_);
2270 if (!existing_lease) {
2271 // There is no such lease, so let's add it.
2272 LeaseMgrFactory::instance().addLease(lease);
2273 ++applied_lease_count;
2274 LeaseMgr::updateStatsOnAdd(lease);
2275 } else if (existing_lease->cltt_ < lease->cltt_) {
2276 // If the existing lease is older than the fetched lease, update
2277 // the lease in our local database.
2278 // Update lease current expiration time with value received from the
2279 // database. Some database backends reject operations on the lease if
2280 // the current expiration time value does not match what is stored.
2281 Lease::syncCurrentExpirationTime(*existing_lease, *lease);
2282 LeaseMgrFactory::instance().updateLease4(lease);
2283 ++applied_lease_count;
2284 LeaseMgr::updateStatsOnUpdate(existing_lease, lease);
2285 } else {
2286 LOG_DEBUG(ha_logger, DBGLVL_TRACE_BASIC, HA_LEASE_SYNC_STALE_LEASE4_SKIP)
2287 .arg(config_->getThisServerName())
2288 .arg(lease->addr_.toText())
2289 .arg(lease->subnet_id_);
2290 }
2291
2292 } else {
2293 Lease6Ptr lease = Lease6::fromElement(*l);
2294
2295 // If we're not on the last page and we're processing final lease on
2296 // this page, let's record the lease as input to the next
2297 // lease6-get-page command.
2298 if ((leases_element.size() >= config_->getSyncPageLimit()) &&
2299 (l + 1 == leases_element.end())) {
2300 last_lease_in_callback = boost::dynamic_pointer_cast<Lease>(lease);
2301 }
2302
2303 if (!lease_sync_filter_.shouldSync(lease)) {
2304 continue;
2305 }
2306
2307 // Check if there is such lease in the database already.
2308 Lease6Ptr existing_lease = LeaseMgrFactory::instance().getLease6(lease->type_,
2309 lease->addr_);
2310 if (!existing_lease) {
2311 // There is no such lease, so let's add it.
2312 LeaseMgrFactory::instance().addLease(lease);
2313 ++applied_lease_count;
2314 LeaseMgr::updateStatsOnAdd(lease);
2315 } else if (existing_lease->cltt_ < lease->cltt_) {
2316 // If the existing lease is older than the fetched lease, update
2317 // the lease in our local database.
2318 // Update lease current expiration time with value received from the
2319 // database. Some database backends reject operations on the lease if
2320 // the current expiration time value does not match what is stored.
2321 Lease::syncCurrentExpirationTime(*existing_lease, *lease);
2322 LeaseMgrFactory::instance().updateLease6(lease);
2323 ++applied_lease_count;
2324 LeaseMgr::updateStatsOnUpdate(existing_lease, lease);
2325 } else {
2326 LOG_DEBUG(ha_logger, DBGLVL_TRACE_BASIC, HA_LEASE_SYNC_STALE_LEASE6_SKIP)
2327 .arg(config_->getThisServerName())
2328 .arg(lease->addr_.toText())
2329 .arg(lease->subnet_id_);
2330 }
2331 }
2332
2333 } catch (const std::exception& ex) {
2334 LOG_WARN(ha_logger, HA_LEASE_SYNC_FAILED)
2335 .arg(config_->getThisServerName())
2336 .arg((*l)->str())
2337 .arg(ex.what());
2338 }
2339 }
2340
2341 LOG_INFO(ha_logger, HA_LEASES_SYNC_APPLIED_LEASES)
2342 .arg(config_->getThisServerName())
2343 .arg(applied_lease_count);
2344
2345 } catch (const std::exception& ex) {
2346 error_message = ex.what();
2348 .arg(config_->getThisServerName())
2349 .arg(remote_config->getLogLabel())
2350 .arg(error_message);
2351 }
2352 }
2353
2354 // If there was an error communicating with the partner, mark the
2355 // partner as unavailable.
2356 if (!error_message.empty()) {
2357 communication_state_->setPartnerUnavailable();
2358
2359 } else if (last_lease_in_callback) {
2360 // This indicates that there are more leases to be fetched.
2361 // Therefore, we have to send another leaseX-get-page command.
2362 asyncSyncLeases(http_client, remote_config, max_period, last_lease_in_callback,
2363 post_sync_action, dhcp_disabled);
2364 return;
2365 }
2366
2367 // Invoke post synchronization action if it was specified.
2368 if (post_sync_action) {
2369 post_sync_action(error_message.empty(),
2370 error_message,
2371 dhcp_disabled);
2372 }
2373 },
2374 HttpClient::RequestTimeout(config_->getSyncTimeout()),
2375 std::bind(&HAService::clientConnectHandler, this, ph::_1, ph::_2),
2376 std::bind(&HAService::clientHandshakeHandler, this, ph::_1),
2377 std::bind(&HAService::clientCloseHandler, this, ph::_1)
2378 );
2379
2380}
2381
2383HAService::processSynchronize(const std::string& server_name,
2384 const unsigned int max_period) {
2385 HAConfig::PeerConfigPtr remote_config;
2386 try {
2387 remote_config = config_->getPeerConfig(server_name);
2388 } catch (const std::exception& ex) {
2389 return (createAnswer(CONTROL_RESULT_ERROR, ex.what()));
2390 }
2391 // We must not synchronize with self.
2392 if (remote_config->getName() == config_->getThisServerName()) {
2393 return (createAnswer(CONTROL_RESULT_ERROR, "'" + remote_config->getName()
2394 + "' points to local server but should point to a partner"));
2395 }
2396 std::string answer_message;
2397 int sync_status = synchronize(answer_message, remote_config, max_period);
2398 return (createAnswer(sync_status, answer_message));
2399}
2400
2401int
2402HAService::synchronize(std::string& status_message,
2403 const HAConfig::PeerConfigPtr& remote_config,
2404 const unsigned int max_period) {
2405 lease_sync_filter_.apply();
2406
2407 IOServicePtr io_service(new IOService());
2408 HttpClient client(io_service, false);
2409
2410 asyncSyncLeases(client, remote_config, max_period, Lease4Ptr(),
2411 [&](const bool success, const std::string& error_message,
2412 const bool dhcp_disabled) {
2413 // If there was a fatal error while fetching the leases, let's
2414 // log an error message so as it can be included in the response
2415 // to the controlling client.
2416 if (!success) {
2417 status_message = error_message;
2418 }
2419
2420 // Whether or not there was an error while fetching the leases,
2421 // we need to re-enable the DHCP service on the peer if the
2422 // DHCP service was disabled in the course of synchronization.
2423 if (dhcp_disabled) {
2424 // If the synchronization was completed successfully let's
2425 // try to send the ha-sync-complete-notify command to the
2426 // partner.
2427 if (success) {
2428 asyncSyncCompleteNotify(client, remote_config,
2429 [&](const bool success_complete_notify,
2430 const std::string& error_message_complete_notify,
2431 const int rcode) {
2432 // This command may not be supported by the partner when it
2433 // runs an older Kea version. In that case, send the dhcp-enable
2434 // command as in previous Kea version.
2436 asyncEnableDHCPService(client, remote_config,
2437 [&](const bool success_enable_dhcp,
2438 const std::string& error_message_enable_dhcp,
2439 const int) {
2440 // It is possible that we have already recorded an error
2441 // message while synchronizing the lease database. Don't
2442 // override the existing error message.
2443 if (!success_enable_dhcp && status_message.empty()) {
2444 status_message = error_message_enable_dhcp;
2445 }
2446
2447 // The synchronization process is completed, so let's break
2448 // the IO service so as we can return the response to the
2449 // controlling client.
2450 io_service->stop();
2451 });
2452
2453 } else {
2454 // ha-sync-complete-notify command was delivered to the partner.
2455 // The synchronization process ends here.
2456 if (!success_complete_notify && status_message.empty()) {
2457 status_message = error_message_complete_notify;
2458 }
2459
2460 io_service->stop();
2461 }
2462 });
2463
2464 } else {
2465 // Synchronization was unsuccessful. Send the dhcp-enable command to
2466 // re-enable the DHCP service. Note, that we don't send the
2467 // ha-sync-complete-notify command in this case. It is only sent in
2468 // the case when synchronization ends successfully.
2469 asyncEnableDHCPService(client, remote_config,
2470 [&](const bool success_enable_dhcp,
2471 const std::string& error_message_enable_dhcp,
2472 const int) {
2473 if (!success_enable_dhcp && status_message.empty()) {
2474 status_message = error_message_enable_dhcp;
2475 }
2476
2477 // The synchronization process is completed, so let's break
2478 // the IO service so as we can return the response to the
2479 // controlling client.
2480 io_service->stop();
2481
2482 });
2483 }
2484
2485 } else {
2486 // Also stop IO service if there is no need to enable DHCP
2487 // service.
2488 io_service->stop();
2489 }
2490 });
2491
2493 .arg(config_->getThisServerName())
2494 .arg(remote_config->getLogLabel());
2495
2496 // Measure duration of the synchronization.
2497 Stopwatch stopwatch;
2498
2499 // Run the IO service until it is stopped by any of the callbacks. This
2500 // makes it synchronous.
2501 io_service->run();
2502
2503 // End measuring duration.
2504 stopwatch.stop();
2505
2506 client.stop();
2507
2508 io_service->stopAndPoll();
2509
2510 // If an error message has been recorded, return an error to the controlling
2511 // client.
2512 if (!status_message.empty()) {
2514
2516 .arg(config_->getThisServerName())
2517 .arg(remote_config->getLogLabel())
2518 .arg(status_message);
2519
2520 return (CONTROL_RESULT_ERROR);
2521
2522 }
2523
2524 // Everything was fine, so let's return a success.
2525 status_message = "Lease database synchronization complete.";
2527
2529 .arg(config_->getThisServerName())
2530 .arg(remote_config->getLogLabel())
2531 .arg(stopwatch.logFormatLastDuration());
2532
2533 return (CONTROL_RESULT_SUCCESS);
2534}
2535
2536void
2539 PostRequestCallback post_request_action) {
2540 if (lease_update_backlog_.size() == 0) {
2541 post_request_action(true, "", CONTROL_RESULT_SUCCESS);
2542 return;
2543 }
2544
2545 ConstElementPtr command;
2548 Lease4Ptr lease = boost::dynamic_pointer_cast<Lease4>(lease_update_backlog_.pop(op_type));
2549 if (op_type == LeaseUpdateBacklog::ADD) {
2550 command = CommandCreator::createLease4Update(*lease);
2551 } else {
2552 command = CommandCreator::createLease4Delete(*lease);
2553 }
2554
2555 } else {
2557 }
2558
2559 // Create HTTP/1.1 request including our command.
2560 PostHttpRequestJsonPtr request = boost::make_shared<PostHttpRequestJson>
2562 HostHttpHeader(config->getUrl().getStrippedHostname()));
2563 config->addBasicAuthHttpHeader(request);
2564 request->setBodyAsJson(command);
2565 request->finalize();
2566
2567 // Response object should also be created because the HTTP client needs
2568 // to know the type of the expected response.
2569 HttpResponseJsonPtr response = boost::make_shared<HttpResponseJson>();
2570
2571 http_client.asyncSendRequest(config->getUrl(), config->getTlsContext(),
2572 request, response,
2573 [this, &http_client, config, post_request_action]
2574 (const boost::system::error_code& ec,
2575 const HttpResponsePtr& http_response,
2576 const std::string& error_str) {
2577
2578 int rcode = 0;
2579 std::string error_message;
2580
2581 if (ec || !error_str.empty()) {
2582 error_message = (ec ? ec.message() : error_str);
2583 LOG_WARN(ha_logger, HA_LEASES_BACKLOG_COMMUNICATIONS_FAILED)
2584 .arg(config_->getThisServerName())
2585 .arg(config->getLogLabel())
2586 .arg(ec ? ec.message() : error_str);
2587
2588 } else {
2589 // Handle third group of errors.
2590 try {
2591 auto args = verifyAsyncResponse(http_response, rcode);
2592 } catch (const std::exception& ex) {
2593 error_message = ex.what();
2595 .arg(config_->getThisServerName())
2596 .arg(config->getLogLabel())
2597 .arg(ex.what());
2598 }
2599 }
2600
2601 // Recursively send all outstanding lease updates or break when an
2602 // error occurs. In DHCPv6, this is a single iteration because we use
2603 // lease6-bulk-apply, which combines many lease updates in a single
2604 // transaction. In the case of DHCPv4, each update is sent in its own
2605 // transaction.
2606 if (error_message.empty()) {
2607 asyncSendLeaseUpdatesFromBacklog(http_client, config, post_request_action);
2608 } else {
2609 post_request_action(error_message.empty(), error_message, rcode);
2610 }
2611 });
2612}
2613
2614bool
2616 auto num_updates = lease_update_backlog_.size();
2617 if (num_updates == 0) {
2619 .arg(config_->getThisServerName());
2620 return (true);
2621 }
2622
2623 IOServicePtr io_service(new IOService());
2624 HttpClient client(io_service, false);
2625 auto remote_config = config_->getFailoverPeerConfig();
2626 bool updates_successful = true;
2627
2629 .arg(config_->getThisServerName())
2630 .arg(num_updates)
2631 .arg(remote_config->getName());
2632
2633 asyncSendLeaseUpdatesFromBacklog(client, remote_config,
2634 [&](const bool success, const std::string&, const int) {
2635 io_service->stop();
2636 updates_successful = success;
2637 });
2638
2639 // Measure duration of the updates.
2640 Stopwatch stopwatch;
2641
2642 // Run the IO service until it is stopped by the callback. This makes it synchronous.
2643 io_service->run();
2644
2645 // End measuring duration.
2646 stopwatch.stop();
2647
2648 client.stop();
2649
2650 io_service->stopAndPoll();
2651
2652 if (updates_successful) {
2654 .arg(config_->getThisServerName())
2655 .arg(remote_config->getName())
2656 .arg(stopwatch.logFormatLastDuration());
2657 }
2658
2659 return (updates_successful);
2660}
2661
2662void
2665 PostRequestCallback post_request_action) {
2666 ConstElementPtr command = CommandCreator::createHAReset(config_->getThisServerName(),
2667 server_type_);
2668
2669 // Create HTTP/1.1 request including our command.
2670 PostHttpRequestJsonPtr request = boost::make_shared<PostHttpRequestJson>
2672 HostHttpHeader(config->getUrl().getStrippedHostname()));
2673 config->addBasicAuthHttpHeader(request);
2674 request->setBodyAsJson(command);
2675 request->finalize();
2676
2677 // Response object should also be created because the HTTP client needs
2678 // to know the type of the expected response.
2679 HttpResponseJsonPtr response = boost::make_shared<HttpResponseJson>();
2680
2681 http_client.asyncSendRequest(config->getUrl(), config->getTlsContext(),
2682 request, response,
2683 [this, config, post_request_action]
2684 (const boost::system::error_code& ec,
2685 const HttpResponsePtr& http_response,
2686 const std::string& error_str) {
2687
2688 int rcode = 0;
2689 std::string error_message;
2690
2691 if (ec || !error_str.empty()) {
2692 error_message = (ec ? ec.message() : error_str);
2693 LOG_WARN(ha_logger, HA_RESET_COMMUNICATIONS_FAILED)
2694 .arg(config_->getThisServerName())
2695 .arg(config->getLogLabel())
2696 .arg(ec ? ec.message() : error_str);
2697
2698 } else {
2699 // Handle third group of errors.
2700 try {
2701 auto args = verifyAsyncResponse(http_response, rcode);
2702 } catch (const std::exception& ex) {
2703 error_message = ex.what();
2705 .arg(config_->getThisServerName())
2706 .arg(config->getLogLabel())
2707 .arg(ex.what());
2708 }
2709 }
2710
2711 post_request_action(error_message.empty(), error_message, rcode);
2712 });
2713}
2714
2715bool
2717 IOServicePtr io_service(new IOService());
2718 HttpClient client(io_service, false);
2719 auto remote_config = config_->getFailoverPeerConfig();
2720 bool reset_successful = true;
2721
2722 asyncSendHAReset(client, remote_config,
2723 [&](const bool success, const std::string&, const int) {
2724 io_service->stop();
2725 reset_successful = success;
2726 });
2727
2728 // Run the IO service until it is stopped by the callback. This makes it synchronous.
2729 io_service->run();
2730
2731 client.stop();
2732
2733 io_service->stopAndPoll();
2734
2735 return (reset_successful);
2736}
2737
2739HAService::processScopes(const std::vector<std::string>& scopes) {
2740 try {
2741 query_filter_.serveScopes(scopes);
2743
2744 } catch (const std::exception& ex) {
2745 return (createAnswer(CONTROL_RESULT_ERROR, ex.what()));
2746 }
2747
2748 return (createAnswer(CONTROL_RESULT_SUCCESS, "New HA scopes configured."));
2749}
2750
2753 if (unpause()) {
2754 return (createAnswer(CONTROL_RESULT_SUCCESS, "HA state machine continues."));
2755 }
2756 return (createAnswer(CONTROL_RESULT_SUCCESS, "HA state machine is not paused."));
2757}
2758
2760HAService::processMaintenanceNotify(const bool cancel, const std::string& state) {
2761 if (cancel) {
2763 return (createAnswer(CONTROL_RESULT_ERROR, "Unable to cancel the"
2764 " maintenance for the server not in the"
2765 " in-maintenance state."));
2766 }
2767
2768 try {
2769 communication_state_->setPartnerState(state);
2770
2771 } catch (...) {
2772 // Hopefully the received state is correct. If it isn't, let's set the
2773 // partner state to unavailable and count on the state machine to resolve.
2774 communication_state_->setPartnerUnavailable();
2775 }
2777 // In rare cases the previous state may be the server's current state. Transitioning
2778 // to it would cause a deadlock and the server will remain stuck in maintenance.
2779 // In these cases let's simply transition to the waiting state and the state machine
2780 // should solve it.
2783
2784 // Communicate the new state to the partner.
2785 ElementPtr arguments = Element::createMap();
2786 std::string state_label = getState(getCurrState())->getLabel();
2787 arguments->set("state", Element::create(state_label));
2788
2789 return (createAnswer(CONTROL_RESULT_SUCCESS, "Server maintenance canceled.", arguments));
2790 }
2791
2792 switch (getCurrState()) {
2793 case HA_BACKUP_ST:
2795 case HA_TERMINATED_ST:
2796 // The reason why we don't return an error result here is that we have to
2797 // have a way to distinguish between the errors caused by the communication
2798 // issues and the cases when there is no communication error but the server
2799 // is not allowed to enter the in-maintenance state. In the former case, the
2800 // partner would go to partner-down. In the case signaled by the special
2801 // result code entering the maintenance state is not allowed.
2803 "Unable to transition the server from the "
2804 + stateToString(getCurrState()) + " to"
2805 " in-maintenance state."));
2806 default:
2809 }
2810 return (createAnswer(CONTROL_RESULT_SUCCESS, "Server is in-maintenance state."));
2811}
2812
2815 switch (getCurrState()) {
2816 case HA_BACKUP_ST:
2819 case HA_TERMINATED_ST:
2820 return (createAnswer(CONTROL_RESULT_ERROR, "Unable to transition the server from"
2821 " the " + stateToString(getCurrState()) + " to"
2822 " partner-in-maintenance state."));
2823 default:
2824 ;
2825 }
2826
2827 HAConfig::PeerConfigPtr remote_config = config_->getFailoverPeerConfig();
2828
2829 // Create HTTP/1.1 request including ha-maintenance-notify command
2830 // with the cancel flag set to false.
2831 PostHttpRequestJsonPtr request = boost::make_shared<PostHttpRequestJson>
2833 HostHttpHeader(remote_config->getUrl().getStrippedHostname()));
2834 remote_config->addBasicAuthHttpHeader(request);
2835 request->setBodyAsJson(CommandCreator::createMaintenanceNotify(config_->getThisServerName(),
2836 false, getCurrState(), server_type_));
2837 request->finalize();
2838
2839 // Response object should also be created because the HTTP client needs
2840 // to know the type of the expected response.
2841 HttpResponseJsonPtr response = boost::make_shared<HttpResponseJson>();
2842
2843 IOServicePtr io_service(new IOService());
2844 HttpClient client(io_service, false);
2845
2846 boost::system::error_code captured_ec;
2847 std::string captured_error_message;
2848 int captured_rcode = 0;
2849
2850 // Schedule asynchronous HTTP request.
2851 client.asyncSendRequest(remote_config->getUrl(),
2852 remote_config->getTlsContext(),
2853 request, response,
2854 [this, remote_config, &io_service, &captured_ec, &captured_error_message,
2855 &captured_rcode]
2856 (const boost::system::error_code& ec,
2857 const HttpResponsePtr& http_response,
2858 const std::string& error_str) {
2859
2860 io_service->stop();
2861
2862 // There are three possible groups of errors. One is the IO error
2863 // causing issues in communication with the peer. Another one is
2864 // an HTTP parsing error. The last type of error is when non-success
2865 // error code is returned in the response carried in the HTTP message
2866 // or if the JSON response is otherwise broken.
2867
2868 std::string error_message;
2869
2870 // Handle first two groups of errors.
2871 if (ec || !error_str.empty()) {
2872 error_message = (ec ? ec.message() : error_str);
2873 LOG_ERROR(ha_logger, HA_MAINTENANCE_NOTIFY_COMMUNICATIONS_FAILED)
2874 .arg(config_->getThisServerName())
2875 .arg(remote_config->getLogLabel())
2876 .arg(error_message);
2877
2878 } else {
2879
2880 // Handle third group of errors.
2881 try {
2882 static_cast<void>(verifyAsyncResponse(http_response, captured_rcode));
2883
2884 } catch (const std::exception& ex) {
2885 error_message = ex.what();
2887 .arg(config_->getThisServerName())
2888 .arg(remote_config->getLogLabel())
2889 .arg(error_message);
2890 }
2891 }
2892
2893 // If there was an error communicating with the partner, mark the
2894 // partner as unavailable.
2895 if (!error_message.empty()) {
2896 communication_state_->setPartnerUnavailable();
2897 }
2898
2899 captured_ec = ec;
2900 captured_error_message = error_message;
2901 },
2903 std::bind(&HAService::clientConnectHandler, this, ph::_1, ph::_2),
2904 std::bind(&HAService::clientHandshakeHandler, this, ph::_1),
2905 std::bind(&HAService::clientCloseHandler, this, ph::_1)
2906 );
2907
2908 // Run the IO service until it is stopped by any of the callbacks. This
2909 // makes it synchronous.
2910 io_service->run();
2911
2912 client.stop();
2913
2914 io_service->stopAndPoll();
2915
2916 // If there was a communication problem with the partner we assume that
2917 // the partner is already down while we receive this command.
2918 if (captured_ec || (captured_rcode == CONTROL_RESULT_ERROR)) {
2919 postNextEvent(HA_MAINTENANCE_START_EVT);
2920 verboseTransition(HA_PARTNER_DOWN_ST);
2921 runModel(NOP_EVT);
2923 "Server is now in the partner-down state as its"
2924 " partner appears to be offline for maintenance."));
2925
2926 } else if (captured_rcode == CONTROL_RESULT_SUCCESS) {
2927 // If the partner responded indicating no error it means that the
2928 // partner has been transitioned to the in-maintenance state. In that
2929 // case we transition to the partner-in-maintenance state.
2930 postNextEvent(HA_MAINTENANCE_START_EVT);
2931 verboseTransition(HA_PARTNER_IN_MAINTENANCE_ST);
2932 runModel(NOP_EVT);
2933
2934 } else {
2935 // Partner server returned a special status code which means that it can't
2936 // transition to the partner-in-maintenance state.
2937 return (createAnswer(CONTROL_RESULT_ERROR, "Unable to transition to the"
2938 " partner-in-maintenance state. The partner server responded"
2939 " with the following message to the ha-maintenance-notify"
2940 " command: " + captured_error_message + "."));
2941
2942 }
2943
2945 "Server is now in the partner-in-maintenance state"
2946 " and its partner is in-maintenance state. The partner"
2947 " can be now safely shut down."));
2948}
2949
2953 return (createAnswer(CONTROL_RESULT_ERROR, "Unable to cancel maintenance"
2954 " request because the server is not in the"
2955 " partner-in-maintenance state."));
2956 }
2957
2958 // This is the state the server will transition to if the notification to the
2959 // partner is successful.
2961
2962 HAConfig::PeerConfigPtr remote_config = config_->getFailoverPeerConfig();
2963
2964 // Create HTTP/1.1 request including ha-maintenance-notify command
2965 // with the cancel flag set to true.
2966 PostHttpRequestJsonPtr request = boost::make_shared<PostHttpRequestJson>
2968 HostHttpHeader(remote_config->getUrl().getStrippedHostname()));
2969 remote_config->addBasicAuthHttpHeader(request);
2970 request->setBodyAsJson(CommandCreator::createMaintenanceNotify(config_->getThisServerName(),
2971 true,
2972 next_state,
2973 server_type_));
2974 request->finalize();
2975
2976 // Response object should also be created because the HTTP client needs
2977 // to know the type of the expected response.
2978 HttpResponseJsonPtr response = boost::make_shared<HttpResponseJson>();
2979
2980 IOServicePtr io_service(new IOService());
2981 HttpClient client(io_service, false);
2982
2983 std::string error_message;
2984
2985 // Schedule asynchronous HTTP request.
2986 client.asyncSendRequest(remote_config->getUrl(),
2987 remote_config->getTlsContext(),
2988 request, response,
2989 [this, remote_config, &io_service, &error_message]
2990 (const boost::system::error_code& ec,
2991 const HttpResponsePtr& http_response,
2992 const std::string& error_str) {
2993
2994 io_service->stop();
2995
2996 // Handle first two groups of errors.
2997 if (ec || !error_str.empty()) {
2998 error_message = (ec ? ec.message() : error_str);
2999 LOG_ERROR(ha_logger, HA_MAINTENANCE_NOTIFY_CANCEL_COMMUNICATIONS_FAILED)
3000 .arg(config_->getThisServerName())
3001 .arg(remote_config->getLogLabel())
3002 .arg(error_message);
3003
3004 } else {
3005
3006 // Handle third group of errors.
3007 try {
3008 int rcode = 0;
3009 ConstElementPtr args = verifyAsyncResponse(http_response, rcode);
3010
3011 // Partner's state has changed after the notification. However, we don't know
3012 // its new state. We'll check if the partner returned its state. If it didn't,
3013 // we set the unavailable state as a default.
3014 communication_state_->setPartnerUnavailable();
3015
3016 // Newer Kea versions return the state of the notified server.
3017 // Older versions don't, so the arguments may not be present.
3018 if (args && args->getType() == Element::map) {
3019 // Arguments may include partner's state.
3020 ConstElementPtr state = args->get("state");
3021 if (state) {
3022 if (state->getType() != Element::string) {
3023 isc_throw(CtrlChannelError, "server state not returned in response"
3024 " to a ha-heartbeat command or it is not a string");
3025 }
3026 communication_state_->setPartnerState(state->stringValue());
3027 }
3028 }
3029 } catch (const std::exception& ex) {
3030 error_message = ex.what();
3032 .arg(config_->getThisServerName())
3033 .arg(remote_config->getLogLabel())
3034 .arg(error_message);
3035 }
3036 }
3037
3038 // If there was an error communicating with the partner, mark the
3039 // partner as unavailable.
3040 if (!error_message.empty()) {
3041 communication_state_->setPartnerUnavailable();
3042 }
3043 },
3045 std::bind(&HAService::clientConnectHandler, this, ph::_1, ph::_2),
3046 std::bind(&HAService::clientHandshakeHandler, this, ph::_1),
3047 std::bind(&HAService::clientCloseHandler, this, ph::_1)
3048 );
3049
3050 // Run the IO service until it is stopped by any of the callbacks. This
3051 // makes it synchronous.
3052 io_service->run();
3053
3054 client.stop();
3055
3056 io_service->stopAndPoll();
3057
3058 // There was an error in communication with the partner or the
3059 // partner was unable to revert its state.
3060 if (!error_message.empty()) {
3062 "Unable to cancel maintenance. The partner server responded"
3063 " with the following message to the ha-maintenance-notify"
3064 " command: " + error_message + "."));
3065 }
3066
3067 // Successfully reverted partner's state. Let's also revert our state to the
3068 // previous one. Avoid returning to the partner-in-maintenance if it was
3069 // the previous state.
3070 postNextEvent(HA_MAINTENANCE_CANCEL_EVT);
3071 verboseTransition(next_state);
3072 runModel(NOP_EVT);
3073
3075 "Server maintenance successfully canceled."));
3076}
3077
3078void
3080 const HAConfig::PeerConfigPtr& remote_config,
3081 PostRequestCallback post_request_action) {
3082 // Create HTTP/1.1 request including our command.
3083 PostHttpRequestJsonPtr request = boost::make_shared<PostHttpRequestJson>
3085 HostHttpHeader(remote_config->getUrl().getStrippedHostname()));
3086
3087 remote_config->addBasicAuthHttpHeader(request);
3088 request->setBodyAsJson(CommandCreator::createSyncCompleteNotify(getRemoteOrigin(),
3089 config_->getThisServerName(),
3090 server_type_));
3091 request->finalize();
3092
3093 // Response object should also be created because the HTTP client needs
3094 // to know the type of the expected response.
3095 HttpResponseJsonPtr response = boost::make_shared<HttpResponseJson>();
3096
3097 // Schedule asynchronous HTTP request.
3098 http_client.asyncSendRequest(remote_config->getUrl(),
3099 remote_config->getTlsContext(),
3100 request, response,
3101 [this, remote_config, post_request_action]
3102 (const boost::system::error_code& ec,
3103 const HttpResponsePtr& http_response,
3104 const std::string& error_str) {
3105
3106 // There are three possible groups of errors. One is the IO error
3107 // causing issues in communication with the peer. Another one is an
3108 // HTTP parsing error. The last type of error is when non-success
3109 // error code is returned in the response carried in the HTTP message
3110 // or if the JSON response is otherwise broken.
3111
3112 int rcode = 0;
3113 std::string error_message;
3114
3115 // Handle first two groups of errors.
3116 if (ec || !error_str.empty()) {
3117 error_message = (ec ? ec.message() : error_str);
3118 LOG_ERROR(ha_logger, HA_SYNC_COMPLETE_NOTIFY_COMMUNICATIONS_FAILED)
3119 .arg(config_->getThisServerName())
3120 .arg(remote_config->getLogLabel())
3121 .arg(error_message);
3122
3123 } else {
3124
3125 // Handle third group of errors.
3126 try {
3127 static_cast<void>(verifyAsyncResponse(http_response, rcode));
3128
3129 } catch (const CommandUnsupportedError& ex) {
3131
3132 } catch (const std::exception& ex) {
3133 error_message = ex.what();
3135 .arg(config_->getThisServerName())
3136 .arg(remote_config->getLogLabel())
3137 .arg(error_message);
3138 }
3139 }
3140
3141 // If there was an error communicating with the partner, mark the
3142 // partner as unavailable.
3143 if (!error_message.empty()) {
3144 communication_state_->setPartnerUnavailable();
3145 }
3146
3147 // Invoke post request action if it was specified.
3148 if (post_request_action) {
3149 post_request_action(error_message.empty(),
3150 error_message,
3151 rcode);
3152 }
3153 },
3155 std::bind(&HAService::clientConnectHandler, this, ph::_1, ph::_2),
3156 std::bind(&HAService::clientHandshakeHandler, this, ph::_1),
3157 std::bind(&HAService::clientCloseHandler, this, ph::_1)
3158 );
3159}
3160
3162HAService::processSyncCompleteNotify(const unsigned int origin_id) {
3165 // We're in the partner-down state and the partner notified us
3166 // that it has synchronized its database. We can't enable the
3167 // service yet, because it may result in some new lease allocations
3168 // that the partner would miss (we don't send lease updates in the
3169 // partner-down state). We must first send the heartbeat and let
3170 // the state machine resolve the situation between the partners.
3171 // It may unblock the network service.
3172 network_state_->disableService(getLocalOrigin());
3173 }
3174 // Release the network state lock for the remote origin because we have
3175 // acquired the local network state lock above (partner-down state), or
3176 // we don't need the lock (other states).
3177 network_state_->enableService(origin_id);
3179 "Server successfully notified about the synchronization completion."));
3180}
3181
3184 // Set the return code to error in case of early throw.
3185 rcode = CONTROL_RESULT_ERROR;
3186 // The response must cast to JSON type.
3187 HttpResponseJsonPtr json_response =
3188 boost::dynamic_pointer_cast<HttpResponseJson>(response);
3189 if (!json_response) {
3190 isc_throw(CtrlChannelError, "no valid HTTP response found");
3191 }
3192
3193 // Body holds the response to our command.
3194 ConstElementPtr body = json_response->getBodyAsJson();
3195 if (!body) {
3196 isc_throw(CtrlChannelError, "no body found in the response");
3197 }
3198
3199 // Body should contain a list of responses from multiple servers.
3200 if (body->getType() != Element::list) {
3201 // Some control socket errors are returned as a map.
3202 if (body->getType() == Element::map) {
3204 ElementPtr answer = Element::createMap();
3205 answer->set(CONTROL_RESULT, Element::create(rcode));
3206 ConstElementPtr text = body->get(CONTROL_TEXT);
3207 if (text) {
3208 answer->set(CONTROL_TEXT, text);
3209 }
3210 list->add(answer);
3211 body = list;
3212 } else {
3213 isc_throw(CtrlChannelError, "body of the response must be a list");
3214 }
3215 }
3216
3217 // There must be at least one response.
3218 if (body->empty()) {
3219 isc_throw(CtrlChannelError, "list of responses must not be empty");
3220 }
3221
3222 // Check if the status code of the first response. We don't support multiple
3223 // at this time, because we always send a request to a single location.
3224 ConstElementPtr args = parseAnswer(rcode, body->get(0));
3225 if (rcode == CONTROL_RESULT_SUCCESS) {
3226 return (args);
3227 }
3228
3229 std::ostringstream s;
3230
3231 // The empty status can occur for the lease6-bulk-apply command. In that
3232 // case, the response may contain conflicted or erred leases within the
3233 // arguments, rather than globally. For other error cases let's construct
3234 // the error message from the global values.
3235 if (rcode != CONTROL_RESULT_EMPTY) {
3236 // Include an error text if available.
3237 if (args && args->getType() == Element::string) {
3238 s << args->stringValue() << " (";
3239 }
3240 // Include an error code.
3241 s << "error code " << rcode << ")";
3242 }
3243
3244 switch (rcode) {
3246 isc_throw(CommandUnsupportedError, s.str());
3247
3249 isc_throw(ConflictError, s.str());
3250
3252 // Handle the lease6-bulk-apply error cases.
3253 if (args && (args->getType() == Element::map)) {
3254 auto failed_leases = args->get("failed-leases");
3255 if (!failed_leases || (failed_leases->getType() != Element::list)) {
3256 // If there are no failed leases there is nothing to do.
3257 break;
3258 }
3259 auto conflict = false;
3260 ConstElementPtr conflict_error_message;
3261 for (unsigned i = 0; i < failed_leases->size(); ++i) {
3262 auto lease = failed_leases->get(i);
3263 if (!lease || lease->getType() != Element::map) {
3264 continue;
3265 }
3266 auto result = lease->get("result");
3267 if (!result || result->getType() != Element::integer) {
3268 continue;
3269 }
3270 auto error_message = lease->get("error-message");
3271 // Error status code takes precedence over the conflict.
3272 if (result->intValue() == CONTROL_RESULT_ERROR) {
3273 if (error_message && error_message->getType()) {
3274 s << error_message->stringValue() << " (";
3275 }
3276 s << "error code " << result->intValue() << ")";
3277 isc_throw(CtrlChannelError, s.str());
3278 }
3279 if (result->intValue() == CONTROL_RESULT_CONFLICT) {
3280 // Let's record the conflict but there may still be some
3281 // leases with an error status code, so do not throw the
3282 // conflict exception yet.
3283 conflict = true;
3284 conflict_error_message = error_message;
3285 }
3286 }
3287 if (conflict) {
3288 // There are no errors. There are only conflicts. Throw
3289 // appropriate exception.
3290 if (conflict_error_message &&
3291 (conflict_error_message->getType() == Element::string)) {
3292 s << conflict_error_message->stringValue() << " (";
3293 }
3294 s << "error code " << CONTROL_RESULT_CONFLICT << ")";
3295 isc_throw(ConflictError, s.str());
3296 }
3297 }
3298 break;
3299 default:
3300 isc_throw(CtrlChannelError, s.str());
3301 }
3302 return (args);
3303}
3304
3305bool
3306HAService::clientConnectHandler(const boost::system::error_code& ec, int tcp_native_fd) {
3307
3308 // If client is running it's own IOService we do NOT want to
3309 // register the socket with IfaceMgr.
3310 if (client_->getThreadIOService()) {
3311 return (true);
3312 }
3313
3314 // If things look OK register the socket with Interface Manager. Note
3315 // we don't register if the FD is < 0 to avoid an exception throw.
3316 // It is unlikely that this will occur but we want to be liberal
3317 // and avoid issues.
3318 if ((!ec || (ec.value() == boost::asio::error::in_progress))
3319 && (tcp_native_fd >= 0)) {
3320 // External socket callback is a NOP. Ready events handlers are
3321 // run by an explicit call IOService ready in kea-dhcp<n> code.
3322 // We are registering the socket only to interrupt main-thread
3323 // select().
3324 IfaceMgr::instance().addExternalSocket(tcp_native_fd,
3325 std::bind(&HAService::socketReadyHandler, this, ph::_1)
3326 );
3327 }
3328
3329 // If ec.value() == boost::asio::error::already_connected, we should already
3330 // be registered, so nothing to do. If it is any other value, then connect
3331 // failed and Connection logic should handle that, not us, so no matter
3332 // what happens we're returning true.
3333 return (true);
3334}
3335
3336void
3338 // If the socket is ready but does not belong to one of our client's
3339 // ongoing transactions, we close it. This will unregister it from
3340 // IfaceMgr and ensure the client starts over with a fresh connection
3341 // if it needs to do so.
3342 client_->closeIfOutOfBand(tcp_native_fd);
3343}
3344
3345void
3347 if ((tcp_native_fd >= 0) &&
3348 IfaceMgr::instance().isExternalSocket(tcp_native_fd)) {
3350 }
3351}
3352
3353size_t
3355 if (MultiThreadingMgr::instance().getMode()) {
3356 std::lock_guard<std::mutex> lock(mutex_);
3357 return (pending_requests_.size());
3358 } else {
3359 return (pending_requests_.size());
3360 }
3361}
3362
3363template<typename QueryPtrType>
3364int
3365HAService::getPendingRequest(const QueryPtrType& query) {
3366 if (MultiThreadingMgr::instance().getMode()) {
3367 std::lock_guard<std::mutex> lock(mutex_);
3368 return (getPendingRequestInternal(query));
3369 } else {
3370 return (getPendingRequestInternal(query));
3371 }
3372}
3373
3374template<typename QueryPtrType>
3375int
3376HAService::getPendingRequestInternal(const QueryPtrType& query) {
3377 if (pending_requests_.count(query) == 0) {
3378 return (0);
3379 } else {
3380 return (pending_requests_[query]);
3381 }
3382}
3383
3384void
3386 // Since this function is used as CS callback all exceptions must be
3387 // suppressed (except the @ref MultiThreadingInvalidOperation), unlikely
3388 // though they may be.
3389 // The @ref MultiThreadingInvalidOperation is propagated to the scope of the
3390 // @ref MultiThreadingCriticalSection constructor.
3391 try {
3392 if (client_) {
3393 client_->checkPermissions();
3394 }
3395
3396 if (listener_) {
3397 listener_->checkPermissions();
3398 }
3399 } catch (const isc::MultiThreadingInvalidOperation& ex) {
3401 .arg(config_->getThisServerName())
3402 .arg(ex.what());
3403 // The exception needs to be propagated to the caller of the
3404 // @ref MultiThreadingCriticalSection constructor.
3405 throw;
3406 } catch (const std::exception& ex) {
3408 .arg(config_->getThisServerName())
3409 .arg(ex.what());
3410 }
3411}
3412
3413void
3415 // Add critical section callbacks.
3418 std::bind(&HAService::pauseClientAndListener, this),
3419 std::bind(&HAService::resumeClientAndListener, this));
3420
3421 if (client_) {
3422 client_->start();
3423 }
3424
3425 if (listener_) {
3426 listener_->start();
3427 }
3428}
3429
3430void
3432 // Since this function is used as CS callback all exceptions must be
3433 // suppressed, unlikely though they may be.
3434 try {
3435 if (client_) {
3436 client_->pause();
3437 }
3438
3439 if (listener_) {
3440 listener_->pause();
3441 }
3442 } catch (const std::exception& ex) {
3444 .arg(config_->getThisServerName())
3445 .arg(ex.what());
3446 }
3447}
3448
3449void
3451 // Since this function is used as CS callback all exceptions must be
3452 // suppressed, unlikely though they may be.
3453 try {
3454 if (client_) {
3455 client_->resume();
3456 }
3457
3458 if (listener_) {
3459 listener_->resume();
3460 }
3461 } catch (std::exception& ex) {
3463 .arg(config_->getThisServerName())
3464 .arg(ex.what());
3465 }
3466}
3467
3468void
3470 // Remove critical section callbacks.
3472
3473 if (client_) {
3474 client_->stop();
3475 }
3476
3477 if (listener_) {
3478 listener_->stop();
3479 }
3480}
3481
3482// Explicit instantiations.
3483template int HAService::getPendingRequest(const Pkt4Ptr&);
3484template int HAService::getPendingRequest(const Pkt6Ptr&);
3485
3486} // end of namespace isc::ha
3487} // end of namespace isc
static ElementPtr create(const Position &pos=ZERO_POSITION())
Create a NullElement.
Definition data.cc:299
@ map
Definition data.h:160
@ integer
Definition data.h:153
@ list
Definition data.h:159
@ string
Definition data.h:157
static ElementPtr createMap(const Position &pos=ZERO_POSITION())
Creates an empty MapElement type ElementPtr.
Definition data.cc:354
static ElementPtr createList(const Position &pos=ZERO_POSITION())
Creates an empty ListElement type ElementPtr.
Definition data.cc:349
virtual const char * what() const
Returns a C-style character string of the cause of the exception.
Exception thrown when a worker thread is trying to stop or pause the respective thread pool (which wo...
A generic exception that is thrown when an unexpected error condition occurs.
A multi-threaded HTTP listener that can process API commands requests.
static std::unordered_set< std::string > command_accept_list_
The server command accept list.
A standard control channel exception that is thrown if a function is there is a problem with one of t...
void deleteExternalSocket(int socketfd)
Deletes external socket.
Definition iface_mgr.cc:398
static IfaceMgr & instance()
IfaceMgr is a singleton class.
Definition iface_mgr.cc:52
void addExternalSocket(int socketfd, SocketCallback callback)
Adds external socket and a callback.
Definition iface_mgr.cc:367
static data::ConstElementPtr createLease4Delete(const dhcp::Lease4 &lease4)
Creates lease4-del command.
static data::ConstElementPtr createHeartbeat(const std::string &server_name, const HAServerType &server_type)
Creates ha-heartbeat command for DHCP server.
static std::unordered_set< std::string > ha_commands4_
List of commands used by the High Availability in v4.
static data::ConstElementPtr createLease4Update(const dhcp::Lease4 &lease4)
Creates lease4-update command.
static data::ConstElementPtr createSyncCompleteNotify(const unsigned int origin_id, const std::string &server_name, const HAServerType &server_type)
Creates ha-sync-complete-notify command.
static data::ConstElementPtr createLease6BulkApply(const dhcp::Lease6CollectionPtr &leases, const dhcp::Lease6CollectionPtr &deleted_leases)
Creates lease6-bulk-apply command.
static data::ConstElementPtr createLease6GetPage(const dhcp::Lease6Ptr &lease6, const uint32_t limit)
Creates lease6-get-page command.
static data::ConstElementPtr createDHCPDisable(const unsigned int origin_id, const unsigned int max_period, const HAServerType &server_type)
Creates dhcp-disable command for DHCP server.
static data::ConstElementPtr createDHCPEnable(const unsigned int origin_id, const HAServerType &server_type)
Creates dhcp-enable command for DHCP server.
static data::ConstElementPtr createMaintenanceNotify(const std::string &server_name, const bool cancel, const int state, const HAServerType &server_type)
Creates ha-maintenance-notify command.
static std::unordered_set< std::string > ha_commands6_
List of commands used by the High Availability in v6.
static data::ConstElementPtr createHAReset(const std::string &server_name, const HAServerType &server_type)
Creates ha-reset command.
static data::ConstElementPtr createLease4GetPage(const dhcp::Lease4Ptr &lease4, const uint32_t limit)
Creates lease4-get-page command.
Holds communication state between DHCPv4 servers.
Holds communication state between DHCPv6 servers.
Role
Server's role in the High Availability setup.
Definition ha_config.h:83
static std::string roleToString(const HAConfig::PeerConfig::Role &role)
Returns role name.
Definition ha_config.cc:82
std::map< std::string, PeerConfigPtr > PeerConfigMap
Map of the servers' configurations.
Definition ha_config.h:245
static std::string HAModeToString(const HAMode &ha_mode)
Returns HA mode name.
Definition ha_config.cc:233
boost::shared_ptr< PeerConfig > PeerConfigPtr
Pointer to the server's configuration.
Definition ha_config.h:242
static const int HA_MAINTENANCE_START_EVT
ha-maintenance-start command received.
Definition ha_service.h:71
bool inScope(dhcp::Pkt4Ptr &query4)
Checks if the DHCPv4 query should be processed by this server.
void adjustNetworkState()
Enables or disables network state depending on the served scopes.
void stopClientAndListener()
Stop the client and(or) listener instances.
int getNormalState() const
Returns normal operation state for the current configuration.
bool shouldQueueLeaseUpdates(const HAConfig::PeerConfigPtr &peer_config) const
Checks if the lease updates should be queued.
static const int HA_HEARTBEAT_COMPLETE_EVT
Finished heartbeat command.
Definition ha_service.h:56
void asyncSendHAReset(http::HttpClient &http_client, const HAConfig::PeerConfigPtr &remote_config, PostRequestCallback post_request_action)
Sends ha-reset command to partner asynchronously.
bool clientConnectHandler(const boost::system::error_code &ec, int tcp_native_fd)
HttpClient connect callback handler.
void asyncSyncLeases()
Asynchronously reads leases from a peer and updates local lease database.
bool isMaintenanceCanceled() const
Convenience method checking if the current state is a result of canceling the maintenance.
data::ConstElementPtr processMaintenanceCancel()
Processes ha-maintenance-cancel command and returns a response.
void checkPermissionsClientAndListener()
Check client and(or) listener current thread permissions to perform thread pool state transition.
bool shouldReclaim(const dhcp::Lease4Ptr &lease4) const
Checks if the lease should be reclaimed by this server.
void asyncSendLeaseUpdate(const QueryPtrType &query, const HAConfig::PeerConfigPtr &config, const data::ConstElementPtr &command, const hooks::ParkingLotHandlePtr &parking_lot)
Asynchronously sends lease update to the peer.
void verboseTransition(const unsigned state)
Transitions to a desired state and logs it.
bool sendLeaseUpdatesFromBacklog()
Attempts to send all lease updates from the backlog synchronously.
config::CmdHttpListenerPtr listener_
HTTP listener instance used to receive and respond to HA commands and lease updates.
void clientCloseHandler(int tcp_native_fd)
HttpClient close callback handler.
bool leaseUpdateComplete(QueryPtrType &query, const hooks::ParkingLotHandlePtr &parking_lot)
Handle last pending request for this query.
HAConfigPtr config_
Pointer to the HA hooks library configuration.
data::ConstElementPtr processMaintenanceStart()
Processes ha-maintenance-start command and returns a response.
unsigned int id_
Unique service id.
HAServerType server_type_
DHCP server type.
bool sync_complete_notified_
An indicator that a partner sent ha-sync-complete-notify command.
bool shouldTerminate() const
Indicates if the server should transition to the terminated state.
data::ConstElementPtr processScopes(const std::vector< std::string > &scopes)
Processes ha-scopes command and returns a response.
dhcp::NetworkStatePtr network_state_
Pointer to the state of the DHCP service (enabled/disabled).
data::ConstElementPtr processSynchronize(const std::string &server_name, const unsigned int max_period)
Processes ha-sync command and returns a response.
void scheduleHeartbeat()
Schedules asynchronous heartbeat to a peer if it is not scheduled.
void asyncSyncCompleteNotify(http::HttpClient &http_client, const HAConfig::PeerConfigPtr &remote_config, PostRequestCallback post_request_action)
Schedules asynchronous "ha-sync-complete-notify" command to the specified server.
QueryFilter query_filter_
Selects queries to be processed/dropped.
static const int HA_MAINTENANCE_NOTIFY_EVT
ha-maintenance-notify command received.
Definition ha_service.h:68
static const int HA_SYNCED_PARTNER_UNAVAILABLE_EVT
The heartbeat command failed after receiving ha-sync-complete-notify command from the partner.
Definition ha_service.h:78
data::ConstElementPtr processMaintenanceNotify(const bool cancel, const std::string &state)
Processes ha-maintenance-notify command and returns a response.
void conditionalLogPausedState() const
Logs if the server is paused in the current state.
bool unpause()
Unpauses the HA state machine with logging.
static const int HA_CONTROL_RESULT_MAINTENANCE_NOT_ALLOWED
Control result returned in response to ha-maintenance-notify.
Definition ha_service.h:81
void serveDefaultScopes()
Instructs the HA service to serve default scopes.
size_t asyncSendLeaseUpdates(const dhcp::Pkt4Ptr &query, const dhcp::Lease4CollectionPtr &leases, const dhcp::Lease4CollectionPtr &deleted_leases, const hooks::ParkingLotHandlePtr &parking_lot)
Schedules asynchronous IPv4 leases updates.
size_t pendingRequestSize()
Get the number of entries in the pending request map.
static const int HA_SYNCING_SUCCEEDED_EVT
Lease database synchronization succeeded.
Definition ha_service.h:65
bool sendHAReset()
Sends ha-reset command to partner synchronously.
std::function< void(const bool, const std::string &, const int)> PostRequestCallback
Callback invoked when request was sent and a response received or an error occurred.
Definition ha_service.h:95
asiolink::IOServicePtr io_service_
Pointer to the IO service object shared between this hooks library and the DHCP server.
void localDisableDHCPService()
Disables local DHCP service.
CommunicationStatePtr communication_state_
Holds communication state with a peer.
void logFailedLeaseUpdates(const dhcp::PktPtr &query, const data::ConstElementPtr &args) const
Log failed lease updates.
LeaseUpdateBacklog lease_update_backlog_
Backlog of DHCP lease updates.
virtual ~HAService()
Destructor.
static const int HA_SYNCING_FAILED_EVT
Lease database synchronization failed.
Definition ha_service.h:62
static const int HA_MAINTENANCE_CANCEL_EVT
ha-maintenance-cancel command received.
Definition ha_service.h:74
void asyncSendLeaseUpdatesFromBacklog(http::HttpClient &http_client, const HAConfig::PeerConfigPtr &remote_config, PostRequestCallback post_request_action)
Sends lease updates from backlog to partner asynchronously.
data::ConstElementPtr processHeartbeat()
Processes ha-heartbeat command and returns a response.
void asyncSyncLeasesInternal(http::HttpClient &http_client, const HAConfig::PeerConfigPtr &remote_config, const unsigned int max_period, const dhcp::LeasePtr &last_lease, PostSyncCallback post_sync_action, const bool dhcp_disabled)
Implements fetching one page of leases during synchronization.
data::ConstElementPtr processHAReset()
Processes ha-reset command and returns a response.
size_t asyncSendSingleLeaseUpdate(const dhcp::Pkt4Ptr &query, const dhcp::Lease4Ptr &lease, const hooks::ParkingLotHandlePtr &parking_lot)
Schedules an asynchronous IPv4 lease update.
void asyncSendHeartbeat()
Starts asynchronous heartbeat to a peer.
bool isPartnerStateInvalid() const
Indicates if the partner's state is invalid.
void startClientAndListener()
Start the client and(or) listener instances.
data::ConstElementPtr verifyAsyncResponse(const http::HttpResponsePtr &response, int &rcode)
Checks if the response is valid or contains an error.
void resumeClientAndListener()
Resumes client and(or) listener thread pool operations.
data::ConstElementPtr processStatusGet() const
Processes status-get command and returns a response.
int getPendingRequest(const QueryPtrType &query)
Get the number of scheduled requests for a given query.
LeaseSyncFilter lease_sync_filter_
Lease synchronization filter used in hub-and-spoke model.
int synchronize(std::string &status_message, const HAConfig::PeerConfigPtr &remote_config, const unsigned int max_period)
Synchronizes lease database with a partner.
bool shouldSendLeaseUpdates(const HAConfig::PeerConfigPtr &peer_config) const
Checks if the lease updates should be sent as result of leases allocation or release.
void serveFailoverScopes()
Instructs the HA service to serve failover scopes.
void localEnableDHCPService()
Enables local DHCP service.
static const int HA_LEASE_UPDATES_COMPLETE_EVT
Finished lease updates commands.
Definition ha_service.h:59
HAService(const unsigned int id, const asiolink::IOServicePtr &io_service, const dhcp::NetworkStatePtr &network_state, const HAConfigPtr &config, const HAServerType &server_type=HAServerType::DHCPv4)
Constructor.
Definition ha_service.cc:76
void socketReadyHandler(int tcp_native_fd)
IfaceMgr external socket ready callback handler.
http::HttpClientPtr client_
HTTP client instance used to send HA commands and lease updates.
void updatePendingRequest(QueryPtrType &query)
Update pending request counter for this query.
bool shouldPartnerDown() const
Indicates if the server should transition to the partner down state.
void startHeartbeat()
Unconditionally starts one heartbeat to a peer.
data::ConstElementPtr processSyncCompleteNotify(const unsigned int origin_id)
Process ha-sync-complete-notify command and returns a response.
data::ConstElementPtr processContinue()
Processes ha-continue command and returns a response.
void asyncDisableDHCPService(http::HttpClient &http_client, const HAConfig::PeerConfigPtr &remote_config, const unsigned int max_period, PostRequestCallback post_request_action)
Schedules asynchronous "dhcp-disable" command to the specified server.
void pauseClientAndListener()
Pauses client and(or) listener thread pool operations.
std::function< void(const bool, const std::string &, const bool)> PostSyncCallback
Callback invoked when lease database synchronization is complete.
Definition ha_service.h:104
static const int HA_WAITING_TO_TERMINATED_ST_DELAY_MINUTES
A delay in minutes to transition from the waiting to terminated state when the partner remains in ter...
Definition ha_service.h:85
void asyncEnableDHCPService(http::HttpClient &http_client, const HAConfig::PeerConfigPtr &remote_config, PostRequestCallback post_request_action)
Schedules asynchronous "dhcp-enable" command to the specified server.
OpType
Type of the lease update (operation type).
bool inScope(const dhcp::Pkt4Ptr &query4, std::string &scope_class) const
Checks if this server should process the DHCPv4 query.
Represents HTTP Host header.
Definition http_header.h:68
HTTP client class.
void stop()
Halts client-side IO activity.
Definition client.cc:2036
void asyncSendRequest(const Url &url, const asiolink::TlsContextPtr &tls_context, const HttpRequestPtr &request, const HttpResponsePtr &response, const RequestHandler &request_callback, const RequestTimeout &request_timeout=RequestTimeout(10000), const ConnectHandler &connect_callback=ConnectHandler(), const HandshakeHandler &handshake_callback=HandshakeHandler(), const CloseHandler &close_callback=CloseHandler())
Queues new asynchronous HTTP request for a given URL.
Definition client.cc:1975
This class parses and generates time values used in HTTP.
Definition date_time.h:41
std::string rfc1123Format() const
Returns time value formatted as specified in RFC 1123.
Definition date_time.cc:39
static MultiThreadingMgr & instance()
Returns a single instance of Multi Threading Manager.
void removeCriticalSectionCallbacks(const std::string &name)
Removes the set of callbacks associated with a given name from the list of CriticalSection callbacks.
void addCriticalSectionCallbacks(const std::string &name, const CSCallbackSet::Callback &check_cb, const CSCallbackSet::Callback &entry_cb, const CSCallbackSet::Callback &exit_cb)
Adds a set of callbacks to the list of CriticalSection callbacks.
std::string getStateLabel(const int state) const
Fetches the label associated with an state value.
void unpauseModel()
Unpauses state model.
int getLastEvent() const
Fetches the model's last event.
bool isModelPaused() const
Returns whether or not the model is paused.
virtual void defineEvents()
Populates the set of events.
bool doOnExit()
Checks if on exit flag is true.
void defineEvent(int value, const std::string &label)
Adds an event value and associated label to the set of events.
virtual void verifyEvents()
Validates the contents of the set of events.
bool doOnEntry()
Checks if on entry flag is true.
static const int NOP_EVT
Signifies that no event has occurred.
int getCurrState() const
Fetches the model's current state.
void defineState(int value, const std::string &label, StateHandler handler, const StatePausing &state_pausing=STATE_PAUSE_NEVER)
Adds an state value and associated label to the set of states.
void startModel(const int start_state)
Begins execution of the model.
const EventPtr & getEvent(int value)
Fetches the event referred to by value.
virtual void defineStates()
Populates the set of states.
virtual void runModel(int event)
Processes events through the state model.
void transition(int state, int event)
Sets up the model to transition into given state with a given event.
int getNextEvent() const
Fetches the model's next event.
int getPrevState() const
Fetches the model's previous state.
const StatePtr getState(int value)
Fetches the state referred to by value.
void postNextEvent(int event)
Sets the next event to the given event value.
Utility class to measure code execution times.
Definition stopwatch.h:35
void stop()
Stops the stopwatch.
Definition stopwatch.cc:34
std::string logFormatLastDuration() const
Returns the last measured duration in the format directly usable in log messages.
Definition stopwatch.cc:74
This file contains several functions and constants that are used for handling commands and responses ...
if(!(yy_init))
Definition d2_lexer.cc:1515
#define isc_throw(type, stream)
A shortcut macro to insert known values into exception arguments.
An abstract API for lease database.
#define LOG_ERROR(LOGGER, MESSAGE)
Macro to conveniently test error output and log it.
Definition macros.h:32
#define LOG_INFO(LOGGER, MESSAGE)
Macro to conveniently test info output and log it.
Definition macros.h:20
#define LOG_WARN(LOGGER, MESSAGE)
Macro to conveniently test warn output and log it.
Definition macros.h:26
const int CONTROL_RESULT_EMPTY
Status code indicating that the specified command was completed correctly, but failed to produce any ...
const char * CONTROL_TEXT
String used for storing textual description ("text").
ConstElementPtr parseAnswer(int &rcode, const ConstElementPtr &msg)
Parses a standard config/command level answer and returns arguments or text status code.
constexpr long TIMEOUT_DEFAULT_HTTP_CLIENT_REQUEST
Timeout for the HTTP clients awaiting a response to a request.
Definition timeouts.h:34
const int CONTROL_RESULT_ERROR
Status code indicating a general failure.
ConstElementPtr createAnswer()
Creates a standard config/command level success answer message (i.e.
const int CONTROL_RESULT_CONFLICT
Status code indicating that the command was unsuccessful due to a conflict between the command argume...
const int CONTROL_RESULT_COMMAND_UNSUPPORTED
Status code indicating that the specified command is not supported.
const char * CONTROL_RESULT
String used for result, i.e. integer status ("result").
const int CONTROL_RESULT_SUCCESS
Status code indicating a successful operation.
boost::shared_ptr< const Element > ConstElementPtr
Definition data.h:30
boost::shared_ptr< Element > ElementPtr
Definition data.h:29
boost::shared_ptr< isc::dhcp::Pkt > PktPtr
A pointer to either Pkt4 or Pkt6 packet.
Definition pkt.h:999
std::string ClientClass
Defines a single class name.
Definition classify.h:44
boost::shared_ptr< Lease4Collection > Lease4CollectionPtr
A shared pointer to the collection of IPv4 leases.
Definition lease.h:523
boost::shared_ptr< Pkt4 > Pkt4Ptr
A pointer to Pkt4 object.
Definition pkt4.h:556
boost::shared_ptr< Lease6 > Lease6Ptr
Pointer to a Lease6 structure.
Definition lease.h:528
boost::shared_ptr< Lease > LeasePtr
Pointer to the lease object.
Definition lease.h:25
boost::shared_ptr< NetworkState > NetworkStatePtr
Pointer to the NetworkState object.
boost::shared_ptr< Lease6Collection > Lease6CollectionPtr
A shared pointer to the collection of IPv6 leases.
Definition lease.h:696
boost::shared_ptr< Pkt6 > Pkt6Ptr
A pointer to Pkt6 packet.
Definition pkt6.h:31
std::vector< Lease4Ptr > Lease4Collection
A collection of IPv4 leases.
Definition lease.h:520
boost::shared_ptr< Lease4 > Lease4Ptr
Pointer to a Lease4 structure.
Definition lease.h:315
const isc::log::MessageID HA_INVALID_PARTNER_STATE_LOAD_BALANCING
Definition ha_messages.h:52
const isc::log::MessageID HA_RESUME_CLIENT_LISTENER_FAILED
const isc::log::MessageID HA_LOCAL_DHCP_ENABLE
Definition ha_messages.h:91
const isc::log::MessageID HA_LEASES_BACKLOG_NOTHING_TO_SEND
Definition ha_messages.h:68
const isc::log::MessageID HA_LEASES_BACKLOG_FAILED
Definition ha_messages.h:67
const isc::log::MessageID HA_SYNC_FAILED
const isc::log::MessageID HA_TERMINATED_RESTART_PARTNER
const int HA_PASSIVE_BACKUP_ST
In passive-backup state with a single active server and backup servers.
const int HA_HOT_STANDBY_ST
Hot standby state.
const isc::log::MessageID HA_INVALID_PARTNER_STATE_COMMUNICATION_RECOVERY
Definition ha_messages.h:50
const isc::log::MessageID HA_LEASES_BACKLOG_SUCCESS
Definition ha_messages.h:70
const int HA_COMMUNICATION_RECOVERY_ST
Communication recovery state.
const isc::log::MessageID HA_STATE_MACHINE_CONTINUED
isc::log::Logger ha_logger("ha-hooks")
Definition ha_log.h:17
const isc::log::MessageID HA_LEASES_SYNC_FAILED
Definition ha_messages.h:73
const isc::log::MessageID HA_SYNC_SUCCESSFUL
const int HA_UNAVAILABLE_ST
Special state indicating that this server is unable to communicate with the partner.
const isc::log::MessageID HA_CONFIG_LEASE_UPDATES_DISABLED_REMINDER
Definition ha_messages.h:34
const isc::log::MessageID HA_SERVICE_STARTED
const int HA_TERMINATED_ST
HA service terminated state.
const int HA_IN_MAINTENANCE_ST
In maintenance state.
const int HA_LOAD_BALANCING_ST
Load balancing state.
const isc::log::MessageID HA_DHCP_ENABLE_FAILED
Definition ha_messages.h:43
const isc::log::MessageID HA_LEASE_UPDATE_DELETE_FAILED_ON_PEER
Definition ha_messages.h:83
const isc::log::MessageID HA_LEASES_BACKLOG_START
Definition ha_messages.h:69
const isc::log::MessageID HA_SYNC_START
const isc::log::MessageID HA_HEARTBEAT_FAILED
Definition ha_messages.h:45
const int HA_PARTNER_DOWN_ST
Partner down state.
const isc::log::MessageID HA_LEASE_UPDATES_ENABLED
Definition ha_messages.h:79
const isc::log::MessageID HA_INVALID_PARTNER_STATE_HOT_STANDBY
Definition ha_messages.h:51
const isc::log::MessageID HA_STATE_MACHINE_PAUSED
const isc::log::MessageID HA_TERMINATED
const isc::log::MessageID HA_DHCP_DISABLE_FAILED
Definition ha_messages.h:41
boost::shared_ptr< HAConfig > HAConfigPtr
Pointer to the High Availability configuration structure.
Definition ha_config.h:37
const isc::log::MessageID HA_MAINTENANCE_STARTED_IN_PARTNER_DOWN
const int HA_PARTNER_IN_MAINTENANCE_ST
Partner in-maintenance state.
const isc::log::MessageID HA_MAINTENANCE_NOTIFY_FAILED
Definition ha_messages.h:96
const int HA_WAITING_ST
Server waiting state, i.e. waiting for another server to be ready.
HAServerType
Lists possible server types for which HA service is created.
const int HA_BACKUP_ST
Backup state.
const isc::log::MessageID HA_PAUSE_CLIENT_LISTENER_ILLEGAL
const isc::log::MessageID HA_PAUSE_CLIENT_LISTENER_FAILED
const isc::log::MessageID HA_MAINTENANCE_SHUTDOWN_SAFE
Definition ha_messages.h:98
const isc::log::MessageID HA_MAINTENANCE_NOTIFY_CANCEL_FAILED
Definition ha_messages.h:94
const isc::log::MessageID HA_LEASE_UPDATE_CONFLICT
Definition ha_messages.h:81
const isc::log::MessageID HA_LEASE_UPDATES_DISABLED
Definition ha_messages.h:78
const isc::log::MessageID HA_LOCAL_DHCP_DISABLE
Definition ha_messages.h:90
const int HA_SYNCING_ST
Synchronizing database state.
const isc::log::MessageID HA_RESET_FAILED
const isc::log::MessageID HA_STATE_TRANSITION
const isc::log::MessageID HA_CONFIG_LEASE_SYNCING_DISABLED_REMINDER
Definition ha_messages.h:31
std::string stateToString(int state)
Returns state name.
const int HA_READY_ST
Server ready state, i.e. synchronized database, can enable DHCP service.
const isc::log::MessageID HA_TERMINATED_PARTNER_DID_NOT_RESTART
const isc::log::MessageID HA_SYNC_COMPLETE_NOTIFY_FAILED
const isc::log::MessageID HA_MAINTENANCE_STARTED
Definition ha_messages.h:99
const isc::log::MessageID HA_LEASE_UPDATE_CREATE_UPDATE_FAILED_ON_PEER
Definition ha_messages.h:82
const isc::log::MessageID HA_LEASE_UPDATE_FAILED
Definition ha_messages.h:84
const isc::log::MessageID HA_STATE_TRANSITION_PASSIVE_BACKUP
boost::shared_ptr< ParkingLotHandle > ParkingLotHandlePtr
Pointer to the parking lot handle.
boost::shared_ptr< PostHttpRequestJson > PostHttpRequestJsonPtr
Pointer to PostHttpRequestJson.
boost::shared_ptr< HttpResponseJson > HttpResponseJsonPtr
Pointer to the HttpResponseJson object.
boost::shared_ptr< HttpResponse > HttpResponsePtr
Pointer to the HttpResponse object.
Definition response.h:81
const char * MessageID
std::string ptimeToText(boost::posix_time::ptime t, size_t fsecs_precision=MAX_FSECS_PRECISION)
Converts ptime structure to text.
Defines the logger used by the top-level component of kea-lfc.
static constexpr uint32_t STATE_RELEASED
Released lease held in the database for lease affinity.
Definition lease.h:78
HTTP request/response timeout value.
static const HttpVersion & HTTP_11()
HTTP version 1.1.
Definition http_types.h:59