测试环境:3个计算节点,分别属于3个zone
[[email protected] ~(keystone_admin)]# nova availability-zone-list
+-----------------------+----------------------------------------+
| Name | Status |
+-----------------------+----------------------------------------+
| internal | available |
| |- controller2 | |
| | |- nova-conductor | enabled :-) 2016-08-20T14:57:07.000000 |
| | |- nova-scheduler | enabled :-) 2016-08-20T14:57:06.000000 |
| | |- nova-consoleauth | enabled :-) 2016-08-20T14:57:08.000000 |
| | |- nova-cert | enabled :-) 2016-08-20T14:57:07.000000 |
| nova | available |
| |- controller3 | |
| | |- nova-compute | enabled :-) 2016-08-20T14:57:04.000000 |
| ag1 | available |
| |- controller1 | |
| | |- nova-compute | enabled :-) 2016-08-19T23:41:45.000000 |
| ag2 | available |
| |- controller2 | |
| | |- nova-compute | enabled :-) 2016-08-20T14:57:06.000000 |
+-----------------------+----------------------------------------+
测试方式: 启动虚拟机的时候选择Booting from image(creates a new volume)测试
产生的原因: cinder无法识别nova的多zone,cinder能获取的zone信息有:
1、cinder-volume所在的zone
2、cinder.conf配置文件中的两个参数storage_availability_zone = nova和default_availability_zone = nova
大致过下代码调用过程:(从nova那边调用cinder那部分开始)
1、nova/virt/block_device.py -> class DriverImageBlockDevice def attach vol = volume_api.create
2、nova/volume/cinder.py -> class API def create item = client.volumes.create
3、cinder/api/v2/volumes.py -> class VolumeController def create new_volume = self.volume_api.create
4、cinder/volume/api.py -> class API def create flow_engine = create_volume.get_flow
在create函数中cinder获取能获得到的zone的信息:
raw_zones = self.list_availability_zones(enable_cache=True)
availability_zones = set([az[‘name‘] for az in raw_zones])
if CONF.storage_availability_zone:
availability_zones.add(CONF.storage_availability_zone)
def list_availability_zones services = objects.ServiceList.get_all_by_topic ->
cinder/objects/service.py(def get_all_by_topic) db.service_get_all_by_topic ->
cinder/db/api.py(def service_get_all_by_topic) IMPL.service_get_all_by_topic ->
cinder/db/sqlalchemy/api.py(def service_get_all_by_topic)
@require_admin_context def service_get_all_by_topic(context, topic, disabled=None): query = model_query( context, models.Service, read_deleted="no").\ # models在这里cinder/db/sqlalchemy/models.py filter_by(topic=topic) # topic默认传过来的是cinder-volume if disabled is not None: query = query.filter_by(disabled=disabled) return query.all()
查询数据库找出cinder-volume所在的zone
5、cinder/volume/flows/api/create_volume.py -> def get_flow
# 这里是taskflow,只需关注add方法里面的东东 def get_flow(db_api, image_service_api, availability_zones, create_what, scheduler_rpcapi=None, volume_rpcapi=None): """Constructs and returns the api entrypoint flow. This flow will do the following: 1. Inject keys & values for dependent tasks. 2. Extracts and validates the input keys & values. 3. Reserves the quota (reverts quota on any failures). 4. Creates the database entry. 5. Commits the quota. 6. Casts to volume manager or scheduler for further processing. """ flow_name = ACTION.replace(":", "_") + "_api" api_flow = linear_flow.Flow(flow_name) api_flow.add(ExtractVolumeRequestTask( image_service_api, availability_zones, rebind={‘size‘: ‘raw_size‘, ‘availability_zone‘: ‘raw_availability_zone‘, ‘volume_type‘: ‘raw_volume_type‘})) api_flow.add(QuotaReserveTask(), EntryCreateTask(db_api), QuotaCommitTask()) if scheduler_rpcapi and volume_rpcapi: # This will cast it out to either the scheduler or volume manager via # the rpc apis provided. api_flow.add(VolumeCastTask(scheduler_rpcapi, volume_rpcapi, db_api)) # Now load (but do not run) the flow using the provided initial data. return taskflow.engines.load(api_flow, store=create_what)
我们关心的在class ExtractVolumeRequestTask里
先看下入口execute方法,为什么看execute方法,你翻下taskflow怎么用的,就懂了
def execute(self, context, size, snapshot, image_id, source_volume, availability_zone, volume_type, metadata, key_manager, source_replica, consistencygroup, cgsnapshot): utils.check_exclusive_options(snapshot=snapshot, imageRef=image_id, source_volume=source_volume) policy.enforce_action(context, ACTION) # TODO(harlowja): what guarantee is there that the snapshot or source # volume will remain available after we do this initial verification?? snapshot_id = self._extract_snapshot(snapshot) source_volid = self._extract_source_volume(source_volume) source_replicaid = self._extract_source_replica(source_replica) size = self._extract_size(size, source_volume, snapshot) consistencygroup_id = self._extract_consistencygroup(consistencygroup) cgsnapshot_id = self._extract_cgsnapshot(cgsnapshot) self._check_image_metadata(context, image_id, size) availability_zone = self._extract_availability_zone(availability_zone, # 关心的在这里 snapshot, source_volume) # _extract_availability_zone函数 def _extract_availability_zone(self, availability_zone, snapshot, source_volume): """Extracts and returns a validated availability zone. This function will extract the availability zone (if not provided) from the snapshot or source_volume and then performs a set of validation checks on the provided or extracted availability zone and then returns the validated availability zone. """ # Try to extract the availability zone from the corresponding snapshot # or source volume if either is valid so that we can be in the same # availability zone as the source. if availability_zone is None: if snapshot: try: availability_zone = snapshot[‘volume‘][‘availability_zone‘] except (TypeError, KeyError): pass if source_volume and availability_zone is None: try: availability_zone = source_volume[‘availability_zone‘] except (TypeError, KeyError): pass if availability_zone is None: if CONF.default_availability_zone: # default_availability_zone判断 availability_zone = CONF.default_availability_zone else: # For backwards compatibility use the storage_availability_zone availability_zone = CONF.storage_availability_zone if availability_zone not in self.availability_zones: # self.availability_zones就是上述cinder-volume的zone外加cinder.conf中两个配置参数 if CONF.allow_availability_zone_fallback: # allow_availability_zone_fallback这个很关键 original_az = availability_zone availability_zone = ( CONF.default_availability_zone or CONF.storage_availability_zone) LOG.warning(_LW("Availability zone ‘%(s_az)s‘ " "not found, falling back to " "‘%(s_fallback_az)s‘."), {‘s_az‘: original_az, ‘s_fallback_az‘: availability_zone}) else: msg = _("Availability zone ‘%(s_az)s‘ is invalid.") msg = msg % {‘s_az‘: availability_zone} raise exception.InvalidInput(reason=msg) # 没启动allow_availability_zone_fallback的话,就抛异常了