Source code for ironic.common.state_machine

# Copyright (c) 2012 NTT DOCOMO, INC.
# Copyright 2010 OpenStack Foundation
# All Rights Reserved.
#
#    Licensed under the Apache License, Version 2.0 (the "License"); you may
#    not use this file except in compliance with the License. You may obtain
#    a copy of the License at
#
#         http://www.apache.org/licenses/LICENSE-2.0
#
#    Unless required by applicable law or agreed to in writing, software
#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
#    License for the specific language governing permissions and limitations
#    under the License.

"""
Mapping of bare metal node states.

Setting the node `power_state` is handled by the conductor's power
synchronization thread. Based on the power state retrieved from the driver
for the node, the state is set to POWER_ON or POWER_OFF, accordingly.
Should this fail, the `power_state` value is left unchanged, and the node
is placed into maintenance mode.

The `power_state` can also be set manually via the API. A failure to change
the state leaves the current state unchanged. The node is NOT placed into
maintenance mode in this case.
"""

from oslo_log import log as logging

from ironic.common import fsm
from ironic.common import states as st

LOG = logging.getLogger(__name__)

#####################
# State machine model
#####################


[docs] def on_exit(old_state, event): """Used to log when a state is exited.""" LOG.debug("Exiting old state '%s' in response to event '%s'", old_state, event)
[docs] def on_enter(new_state, event): """Used to log when entering a state.""" LOG.debug("Entering new state '%s' in response to event '%s'", new_state, event)
watchers = {} watchers['on_exit'] = on_exit watchers['on_enter'] = on_enter machine = fsm.FSM() # Add stable states for state in st.STABLE_STATES: machine.add_state(state, stable=True, **watchers) # Add verifying state machine.add_state(st.VERIFYING, target=st.MANAGEABLE, **watchers) # Add deploy* states # NOTE(tenbrae): Juno shows a target_provision_state of DEPLOYDONE # this is changed in Kilo to ACTIVE machine.add_state(st.DEPLOYING, target=st.ACTIVE, **watchers) machine.add_state(st.DEPLOYWAIT, target=st.ACTIVE, **watchers) machine.add_state(st.DEPLOYFAIL, target=st.ACTIVE, **watchers) machine.add_state(st.DEPLOYHOLD, target=st.ACTIVE, **watchers) # Add clean* states machine.add_state(st.CLEANING, target=st.AVAILABLE, **watchers) machine.add_state(st.CLEANWAIT, target=st.AVAILABLE, **watchers) machine.add_state(st.CLEANFAIL, target=st.AVAILABLE, **watchers) machine.add_state(st.CLEANHOLD, target=st.AVAILABLE, **watchers) # Add delete* states machine.add_state(st.DELETING, target=st.AVAILABLE, **watchers) # From AVAILABLE, a deployment may be started machine.add_transition(st.AVAILABLE, st.DEPLOYING, 'deploy') # Add inspect* states. machine.add_state(st.INSPECTING, target=st.MANAGEABLE, **watchers) machine.add_state(st.INSPECTFAIL, target=st.MANAGEABLE, **watchers) machine.add_state(st.INSPECTWAIT, target=st.MANAGEABLE, **watchers) # Add adopt* states machine.add_state(st.ADOPTING, target=st.ACTIVE, **watchers) machine.add_state(st.ADOPTFAIL, target=st.ACTIVE, **watchers) # rescue states machine.add_state(st.RESCUING, target=st.RESCUE, **watchers) machine.add_state(st.RESCUEWAIT, target=st.RESCUE, **watchers) machine.add_state(st.RESCUEFAIL, target=st.RESCUE, **watchers) machine.add_state(st.UNRESCUING, target=st.ACTIVE, **watchers) machine.add_state(st.UNRESCUEFAIL, target=st.ACTIVE, **watchers) # A deployment may fail machine.add_transition(st.DEPLOYING, st.DEPLOYFAIL, 'fail') # A failed deployment may be retried # ironic/conductor/manager.py:do_node_deploy() machine.add_transition(st.DEPLOYFAIL, st.DEPLOYING, 'rebuild') # NOTE(tenbrae): Juno allows a client to send "active" to initiate a rebuild machine.add_transition(st.DEPLOYFAIL, st.DEPLOYING, 'deploy') # A deployment may also wait on external callbacks machine.add_transition(st.DEPLOYING, st.DEPLOYWAIT, 'wait') machine.add_transition(st.DEPLOYING, st.DEPLOYHOLD, 'hold') machine.add_transition(st.DEPLOYWAIT, st.DEPLOYHOLD, 'hold') machine.add_transition(st.DEPLOYWAIT, st.DEPLOYING, 'resume') # A deployment waiting on callback may time out machine.add_transition(st.DEPLOYWAIT, st.DEPLOYFAIL, 'fail') # Return the node into a deploying state from holding machine.add_transition(st.DEPLOYHOLD, st.DEPLOYWAIT, 'unhold') # A node in deploy hold may also be aborted machine.add_transition(st.DEPLOYHOLD, st.DEPLOYFAIL, 'abort') # A deployment may complete machine.add_transition(st.DEPLOYING, st.ACTIVE, 'done') # An active instance may be re-deployed # ironic/conductor/manager.py:do_node_deploy() machine.add_transition(st.ACTIVE, st.DEPLOYING, 'rebuild') # An active instance may be deleted # ironic/conductor/manager.py:do_node_tear_down() machine.add_transition(st.ACTIVE, st.DELETING, 'delete') # While a deployment is waiting, it may be deleted # ironic/conductor/manager.py:do_node_tear_down() machine.add_transition(st.DEPLOYWAIT, st.DELETING, 'delete') # A failed deployment may also be deleted # ironic/conductor/manager.py:do_node_tear_down() machine.add_transition(st.DEPLOYFAIL, st.DELETING, 'delete') # This state can also transition to error machine.add_transition(st.DELETING, st.ERROR, 'fail') # When finished deleting, a node will begin cleaning machine.add_transition(st.DELETING, st.CLEANING, 'clean') # If cleaning succeeds, it becomes available for scheduling machine.add_transition(st.CLEANING, st.AVAILABLE, 'done') # If cleaning fails, wait for operator intervention machine.add_transition(st.CLEANING, st.CLEANFAIL, 'fail') machine.add_transition(st.CLEANWAIT, st.CLEANFAIL, 'fail') # While waiting for a clean step to be finished, cleaning may be aborted machine.add_transition(st.CLEANWAIT, st.CLEANFAIL, 'abort') # Cleaning may also wait on external callbacks machine.add_transition(st.CLEANING, st.CLEANWAIT, 'wait') machine.add_transition(st.CLEANING, st.CLEANHOLD, 'hold') machine.add_transition(st.CLEANWAIT, st.CLEANHOLD, 'hold') machine.add_transition(st.CLEANWAIT, st.CLEANING, 'resume') # A node in a clean hold step may also be aborted machine.add_transition(st.CLEANHOLD, st.CLEANFAIL, 'abort') # Return the node back to cleaning machine.add_transition(st.CLEANHOLD, st.CLEANWAIT, 'unhold') # An operator may want to move a CLEANFAIL node to MANAGEABLE, to perform # other actions like cleaning machine.add_transition(st.CLEANFAIL, st.MANAGEABLE, 'manage') # From MANAGEABLE, a node may move to available after going through automated # cleaning machine.add_transition(st.MANAGEABLE, st.CLEANING, 'provide') # From MANAGEABLE, a node may be manually cleaned, going back to manageable # after cleaning is completed machine.add_transition(st.MANAGEABLE, st.CLEANING, 'clean') machine.add_transition(st.CLEANING, st.MANAGEABLE, 'manage') # From AVAILABLE, a node may be made unavailable by managing it machine.add_transition(st.AVAILABLE, st.MANAGEABLE, 'manage') # An errored instance can be rebuilt # ironic/conductor/manager.py:do_node_deploy() machine.add_transition(st.ERROR, st.DEPLOYING, 'rebuild') # or deleted # ironic/conductor/manager.py:do_node_tear_down() machine.add_transition(st.ERROR, st.DELETING, 'delete') # Added transitions for inspection. # Initiate inspection. machine.add_transition(st.MANAGEABLE, st.INSPECTING, 'inspect') # ironic/conductor/manager.py:inspect_hardware(). machine.add_transition(st.INSPECTING, st.MANAGEABLE, 'done') # Inspection may fail. machine.add_transition(st.INSPECTING, st.INSPECTFAIL, 'fail') # Transition for asynchronous inspection machine.add_transition(st.INSPECTING, st.INSPECTWAIT, 'wait') # Inspection is done machine.add_transition(st.INSPECTWAIT, st.MANAGEABLE, 'done') # Inspection failed. machine.add_transition(st.INSPECTWAIT, st.INSPECTFAIL, 'fail') # Inspection is aborted. machine.add_transition(st.INSPECTWAIT, st.INSPECTFAIL, 'abort') # Inspection is continued. machine.add_transition(st.INSPECTWAIT, st.INSPECTING, 'resume') # Move the node to manageable state for any other # action. machine.add_transition(st.INSPECTFAIL, st.MANAGEABLE, 'manage') # Reinitiate the inspect after inspectfail. machine.add_transition(st.INSPECTFAIL, st.INSPECTING, 'inspect') # A provisioned node may have a rescue initiated. machine.add_transition(st.ACTIVE, st.RESCUING, 'rescue') # A rescue may succeed. machine.add_transition(st.RESCUING, st.RESCUE, 'done') # A rescue may also wait on external callbacks machine.add_transition(st.RESCUING, st.RESCUEWAIT, 'wait') machine.add_transition(st.RESCUEWAIT, st.RESCUING, 'resume') # A rescued node may be re-rescued. machine.add_transition(st.RESCUE, st.RESCUING, 'rescue') # A rescued node may be deleted. machine.add_transition(st.RESCUE, st.DELETING, 'delete') # A rescue may fail. machine.add_transition(st.RESCUEWAIT, st.RESCUEFAIL, 'fail') machine.add_transition(st.RESCUING, st.RESCUEFAIL, 'fail') # While waiting for a rescue step to be finished, rescuing may be aborted machine.add_transition(st.RESCUEWAIT, st.RESCUEFAIL, 'abort') # A failed rescue may be re-rescued. machine.add_transition(st.RESCUEFAIL, st.RESCUING, 'rescue') # A failed rescue may be unrescued. machine.add_transition(st.RESCUEFAIL, st.UNRESCUING, 'unrescue') # A failed rescue may be deleted. machine.add_transition(st.RESCUEFAIL, st.DELETING, 'delete') # A rescuewait node may be deleted. machine.add_transition(st.RESCUEWAIT, st.DELETING, 'delete') # A rescued node may be unrescued. machine.add_transition(st.RESCUE, st.UNRESCUING, 'unrescue') # An unrescuing node may succeed machine.add_transition(st.UNRESCUING, st.ACTIVE, 'done') # An unrescuing node may fail machine.add_transition(st.UNRESCUING, st.UNRESCUEFAIL, 'fail') # A failed unrescue may be re-rescued machine.add_transition(st.UNRESCUEFAIL, st.RESCUING, 'rescue') # A failed unrescue may be re-unrescued machine.add_transition(st.UNRESCUEFAIL, st.UNRESCUING, 'unrescue') # A failed unrescue may be deleted. machine.add_transition(st.UNRESCUEFAIL, st.DELETING, 'delete') # Start power credentials verification machine.add_transition(st.ENROLL, st.VERIFYING, 'manage') # Verification can succeed machine.add_transition(st.VERIFYING, st.MANAGEABLE, 'done') # Verification can fail with setting last_error and rolling back to ENROLL machine.add_transition(st.VERIFYING, st.ENROLL, 'fail') # Node Adoption is being attempted machine.add_transition(st.MANAGEABLE, st.ADOPTING, 'adopt') # Adoption can succeed and the node should be set to ACTIVE machine.add_transition(st.ADOPTING, st.ACTIVE, 'done') # Node adoptions can fail and as such nodes shall be set # into a dedicated state to hold the nodes. machine.add_transition(st.ADOPTING, st.ADOPTFAIL, 'fail') # Node adoption can be retried when it previously failed. machine.add_transition(st.ADOPTFAIL, st.ADOPTING, 'adopt') # A node that failed adoption can be moved back to manageable machine.add_transition(st.ADOPTFAIL, st.MANAGEABLE, 'manage') # Add service* states machine.add_state(st.SERVICING, target=st.ACTIVE, **watchers) machine.add_state(st.SERVICEWAIT, target=st.ACTIVE, **watchers) machine.add_state(st.SERVICEFAIL, target=st.ACTIVE, **watchers) machine.add_state(st.SERVICEHOLD, target=st.ACTIVE, **watchers) # A node in service an be returned to active machine.add_transition(st.SERVICING, st.ACTIVE, 'done') # A node in active can be serviced machine.add_transition(st.ACTIVE, st.SERVICING, 'service') # A node in servicing can be failed machine.add_transition(st.SERVICING, st.SERVICEFAIL, 'fail') # A node in service can enter a wait state machine.add_transition(st.SERVICING, st.SERVICEWAIT, 'wait') # A node in service can be held machine.add_transition(st.SERVICING, st.SERVICEHOLD, 'hold') machine.add_transition(st.SERVICEWAIT, st.SERVICEHOLD, 'hold') # A held node in service can get more service steps to start over machine.add_transition(st.SERVICEHOLD, st.SERVICING, 'service') # A held node in service can be removed from service machine.add_transition(st.SERVICEHOLD, st.SERVICEWAIT, 'unhold') # A node in service wait can resume machine.add_transition(st.SERVICEWAIT, st.SERVICING, 'resume') # A node in service wait can failed machine.add_transition(st.SERVICEWAIT, st.SERVICEFAIL, 'fail') # A node in service hold can failed machine.add_transition(st.SERVICEHOLD, st.SERVICEFAIL, 'fail') # A node in service wait can be aborted machine.add_transition(st.SERVICEWAIT, st.SERVICEFAIL, 'abort') # A node in service hold can be aborted machine.add_transition(st.SERVICEHOLD, st.SERVICEFAIL, 'abort') # A node in service fail can re-enter service machine.add_transition(st.SERVICEFAIL, st.SERVICING, 'service') # A node in service fail can be rescued machine.add_transition(st.SERVICEFAIL, st.RESCUING, 'rescue') # A node in service fail can enter wait state machine.add_transition(st.SERVICEFAIL, st.SERVICEWAIT, 'wait') # A node in service fail can be held machine.add_transition(st.SERVICEFAIL, st.SERVICEHOLD, 'hold') # A node in service fail may be deleted. machine.add_transition(st.SERVICEFAIL, st.DELETING, 'delete') # A node in service fail may be aborted (returned to active) machine.add_transition(st.SERVICEFAIL, st.ACTIVE, 'abort') # A node in service wait may be deleted. machine.add_transition(st.SERVICEWAIT, st.DELETING, 'delete')