diff options
113 files changed, 5040 insertions, 1122 deletions
diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c index 550850b264..996b3314f4 100644 --- a/block/qcow2-cluster.c +++ b/block/qcow2-cluster.c @@ -208,7 +208,7 @@ static int l2_load(BlockDriverState *bs, uint64_t offset, uint64_t l2_offset, uint64_t **l2_slice) { BDRVQcow2State *s = bs->opaque; - int start_of_slice = sizeof(uint64_t) * + int start_of_slice = l2_entry_size(s) * (offset_to_l2_index(s, offset) - offset_to_l2_slice_index(s, offset)); return qcow2_cache_get(bs, s->l2_table_cache, l2_offset + start_of_slice, @@ -281,7 +281,7 @@ static int l2_allocate(BlockDriverState *bs, int l1_index) /* allocate a new l2 entry */ - l2_offset = qcow2_alloc_clusters(bs, s->l2_size * sizeof(uint64_t)); + l2_offset = qcow2_alloc_clusters(bs, s->l2_size * l2_entry_size(s)); if (l2_offset < 0) { ret = l2_offset; goto fail; @@ -305,7 +305,7 @@ static int l2_allocate(BlockDriverState *bs, int l1_index) /* allocate a new entry in the l2 cache */ - slice_size2 = s->l2_slice_size * sizeof(uint64_t); + slice_size2 = s->l2_slice_size * l2_entry_size(s); n_slices = s->cluster_size / slice_size2; trace_qcow2_l2_allocate_get_empty(bs, l1_index); @@ -369,70 +369,123 @@ fail: } s->l1_table[l1_index] = old_l2_offset; if (l2_offset > 0) { - qcow2_free_clusters(bs, l2_offset, s->l2_size * sizeof(uint64_t), + qcow2_free_clusters(bs, l2_offset, s->l2_size * l2_entry_size(s), QCOW2_DISCARD_ALWAYS); } return ret; } /* - * Checks how many clusters in a given L2 slice are contiguous in the image - * file. As soon as one of the flags in the bitmask stop_flags changes compared - * to the first cluster, the search is stopped and the cluster is not counted - * as contiguous. (This allows it, for example, to stop at the first compressed - * cluster which may require a different handling) + * For a given L2 entry, count the number of contiguous subclusters of + * the same type starting from @sc_from. Compressed clusters are + * treated as if they were divided into subclusters of size + * s->subcluster_size. + * + * Return the number of contiguous subclusters and set @type to the + * subcluster type. + * + * If the L2 entry is invalid return -errno and set @type to + * QCOW2_SUBCLUSTER_INVALID. */ -static int count_contiguous_clusters(BlockDriverState *bs, int nb_clusters, - int cluster_size, uint64_t *l2_slice, uint64_t stop_flags) +static int qcow2_get_subcluster_range_type(BlockDriverState *bs, + uint64_t l2_entry, + uint64_t l2_bitmap, + unsigned sc_from, + QCow2SubclusterType *type) { - int i; - QCow2ClusterType first_cluster_type; - uint64_t mask = stop_flags | L2E_OFFSET_MASK | QCOW_OFLAG_COMPRESSED; - uint64_t first_entry = be64_to_cpu(l2_slice[0]); - uint64_t offset = first_entry & mask; + BDRVQcow2State *s = bs->opaque; + uint32_t val; - first_cluster_type = qcow2_get_cluster_type(bs, first_entry); - if (first_cluster_type == QCOW2_CLUSTER_UNALLOCATED) { - return 0; + *type = qcow2_get_subcluster_type(bs, l2_entry, l2_bitmap, sc_from); + + if (*type == QCOW2_SUBCLUSTER_INVALID) { + return -EINVAL; + } else if (!has_subclusters(s) || *type == QCOW2_SUBCLUSTER_COMPRESSED) { + return s->subclusters_per_cluster - sc_from; } - /* must be allocated */ - assert(first_cluster_type == QCOW2_CLUSTER_NORMAL || - first_cluster_type == QCOW2_CLUSTER_ZERO_ALLOC); + switch (*type) { + case QCOW2_SUBCLUSTER_NORMAL: + val = l2_bitmap | QCOW_OFLAG_SUB_ALLOC_RANGE(0, sc_from); + return cto32(val) - sc_from; - for (i = 0; i < nb_clusters; i++) { - uint64_t l2_entry = be64_to_cpu(l2_slice[i]) & mask; - if (offset + (uint64_t) i * cluster_size != l2_entry) { - break; - } - } + case QCOW2_SUBCLUSTER_ZERO_PLAIN: + case QCOW2_SUBCLUSTER_ZERO_ALLOC: + val = (l2_bitmap | QCOW_OFLAG_SUB_ZERO_RANGE(0, sc_from)) >> 32; + return cto32(val) - sc_from; - return i; + case QCOW2_SUBCLUSTER_UNALLOCATED_PLAIN: + case QCOW2_SUBCLUSTER_UNALLOCATED_ALLOC: + val = ((l2_bitmap >> 32) | l2_bitmap) + & ~QCOW_OFLAG_SUB_ALLOC_RANGE(0, sc_from); + return ctz32(val) - sc_from; + + default: + g_assert_not_reached(); + } } /* - * Checks how many consecutive unallocated clusters in a given L2 - * slice have the same cluster type. + * Return the number of contiguous subclusters of the exact same type + * in a given L2 slice, starting from cluster @l2_index, subcluster + * @sc_index. Allocated subclusters are required to be contiguous in + * the image file. + * At most @nb_clusters are checked (note that this means clusters, + * not subclusters). + * Compressed clusters are always processed one by one but for the + * purpose of this count they are treated as if they were divided into + * subclusters of size s->subcluster_size. + * On failure return -errno and update @l2_index to point to the + * invalid entry. */ -static int count_contiguous_clusters_unallocated(BlockDriverState *bs, - int nb_clusters, - uint64_t *l2_slice, - QCow2ClusterType wanted_type) +static int count_contiguous_subclusters(BlockDriverState *bs, int nb_clusters, + unsigned sc_index, uint64_t *l2_slice, + unsigned *l2_index) { - int i; + BDRVQcow2State *s = bs->opaque; + int i, count = 0; + bool check_offset = false; + uint64_t expected_offset = 0; + QCow2SubclusterType expected_type = QCOW2_SUBCLUSTER_NORMAL, type; - assert(wanted_type == QCOW2_CLUSTER_ZERO_PLAIN || - wanted_type == QCOW2_CLUSTER_UNALLOCATED); - for (i = 0; i < nb_clusters; i++) { - uint64_t entry = be64_to_cpu(l2_slice[i]); - QCow2ClusterType type = qcow2_get_cluster_type(bs, entry); + assert(*l2_index + nb_clusters <= s->l2_slice_size); - if (type != wanted_type) { + for (i = 0; i < nb_clusters; i++) { + unsigned first_sc = (i == 0) ? sc_index : 0; + uint64_t l2_entry = get_l2_entry(s, l2_slice, *l2_index + i); + uint64_t l2_bitmap = get_l2_bitmap(s, l2_slice, *l2_index + i); + int ret = qcow2_get_subcluster_range_type(bs, l2_entry, l2_bitmap, + first_sc, &type); + if (ret < 0) { + *l2_index += i; /* Point to the invalid entry */ + return -EIO; + } + if (i == 0) { + if (type == QCOW2_SUBCLUSTER_COMPRESSED) { + /* Compressed clusters are always processed one by one */ + return ret; + } + expected_type = type; + expected_offset = l2_entry & L2E_OFFSET_MASK; + check_offset = (type == QCOW2_SUBCLUSTER_NORMAL || + type == QCOW2_SUBCLUSTER_ZERO_ALLOC || + type == QCOW2_SUBCLUSTER_UNALLOCATED_ALLOC); + } else if (type != expected_type) { + break; + } else if (check_offset) { + expected_offset += s->cluster_size; + if (expected_offset != (l2_entry & L2E_OFFSET_MASK)) { + break; + } + } + count += ret; + /* Stop if there are type changes before the end of the cluster */ + if (first_sc + ret < s->subclusters_per_cluster) { break; } } - return i; + return count; } static int coroutine_fn do_perform_cow_read(BlockDriverState *bs, @@ -496,31 +549,37 @@ static int coroutine_fn do_perform_cow_write(BlockDriverState *bs, /* - * get_cluster_offset + * get_host_offset * - * For a given offset of the virtual disk, find the cluster type and offset in - * the qcow2 file. The offset is stored in *cluster_offset. + * For a given offset of the virtual disk find the equivalent host + * offset in the qcow2 file and store it in *host_offset. Neither + * offset needs to be aligned to a cluster boundary. + * + * If the cluster is unallocated then *host_offset will be 0. + * If the cluster is compressed then *host_offset will contain the + * complete compressed cluster descriptor. * * On entry, *bytes is the maximum number of contiguous bytes starting at * offset that we are interested in. * * On exit, *bytes is the number of bytes starting at offset that have the same - * cluster type and (if applicable) are stored contiguously in the image file. - * Compressed clusters are always returned one by one. + * subcluster type and (if applicable) are stored contiguously in the image + * file. The subcluster type is stored in *subcluster_type. + * Compressed clusters are always processed one by one. * - * Returns the cluster type (QCOW2_CLUSTER_*) on success, -errno in error - * cases. + * Returns 0 on success, -errno in error cases. */ -int qcow2_get_cluster_offset(BlockDriverState *bs, uint64_t offset, - unsigned int *bytes, uint64_t *cluster_offset) +int qcow2_get_host_offset(BlockDriverState *bs, uint64_t offset, + unsigned int *bytes, uint64_t *host_offset, + QCow2SubclusterType *subcluster_type) { BDRVQcow2State *s = bs->opaque; - unsigned int l2_index; - uint64_t l1_index, l2_offset, *l2_slice; - int c; + unsigned int l2_index, sc_index; + uint64_t l1_index, l2_offset, *l2_slice, l2_entry, l2_bitmap; + int sc; unsigned int offset_in_cluster; uint64_t bytes_available, bytes_needed, nb_clusters; - QCow2ClusterType type; + QCow2SubclusterType type; int ret; offset_in_cluster = offset_into_cluster(s, offset); @@ -537,19 +596,19 @@ int qcow2_get_cluster_offset(BlockDriverState *bs, uint64_t offset, bytes_needed = bytes_available; } - *cluster_offset = 0; + *host_offset = 0; /* seek to the l2 offset in the l1 table */ l1_index = offset_to_l1_index(s, offset); if (l1_index >= s->l1_size) { - type = QCOW2_CLUSTER_UNALLOCATED; + type = QCOW2_SUBCLUSTER_UNALLOCATED_PLAIN; goto out; } l2_offset = s->l1_table[l1_index] & L1E_OFFSET_MASK; if (!l2_offset) { - type = QCOW2_CLUSTER_UNALLOCATED; + type = QCOW2_SUBCLUSTER_UNALLOCATED_PLAIN; goto out; } @@ -570,7 +629,9 @@ int qcow2_get_cluster_offset(BlockDriverState *bs, uint64_t offset, /* find the cluster offset for the given disk offset */ l2_index = offset_to_l2_slice_index(s, offset); - *cluster_offset = be64_to_cpu(l2_slice[l2_index]); + sc_index = offset_to_sc_index(s, offset); + l2_entry = get_l2_entry(s, l2_slice, l2_index); + l2_bitmap = get_l2_bitmap(s, l2_slice, l2_index); nb_clusters = size_to_clusters(s, bytes_needed); /* bytes_needed <= *bytes + offset_in_cluster, both of which are unsigned @@ -578,9 +639,9 @@ int qcow2_get_cluster_offset(BlockDriverState *bs, uint64_t offset, * true */ assert(nb_clusters <= INT_MAX); - type = qcow2_get_cluster_type(bs, *cluster_offset); - if (s->qcow_version < 3 && (type == QCOW2_CLUSTER_ZERO_PLAIN || - type == QCOW2_CLUSTER_ZERO_ALLOC)) { + type = qcow2_get_subcluster_type(bs, l2_entry, l2_bitmap, sc_index); + if (s->qcow_version < 3 && (type == QCOW2_SUBCLUSTER_ZERO_PLAIN || + type == QCOW2_SUBCLUSTER_ZERO_ALLOC)) { qcow2_signal_corruption(bs, true, -1, -1, "Zero cluster entry found" " in pre-v3 image (L2 offset: %#" PRIx64 ", L2 index: %#x)", l2_offset, l2_index); @@ -588,7 +649,9 @@ int qcow2_get_cluster_offset(BlockDriverState *bs, uint64_t offset, goto fail; } switch (type) { - case QCOW2_CLUSTER_COMPRESSED: + case QCOW2_SUBCLUSTER_INVALID: + break; /* This is handled by count_contiguous_subclusters() below */ + case QCOW2_SUBCLUSTER_COMPRESSED: if (has_data_file(bs)) { qcow2_signal_corruption(bs, true, -1, -1, "Compressed cluster " "entry found in image with external data " @@ -597,51 +660,53 @@ int qcow2_get_cluster_offset(BlockDriverState *bs, uint64_t offset, ret = -EIO; goto fail; } - /* Compressed clusters can only be processed one by one */ - c = 1; - *cluster_offset &= L2E_COMPRESSED_OFFSET_SIZE_MASK; + *host_offset = l2_entry & L2E_COMPRESSED_OFFSET_SIZE_MASK; break; - case QCOW2_CLUSTER_ZERO_PLAIN: - case QCOW2_CLUSTER_UNALLOCATED: - /* how many empty clusters ? */ - c = count_contiguous_clusters_unallocated(bs, nb_clusters, - &l2_slice[l2_index], type); - *cluster_offset = 0; + case QCOW2_SUBCLUSTER_ZERO_PLAIN: + case QCOW2_SUBCLUSTER_UNALLOCATED_PLAIN: break; - case QCOW2_CLUSTER_ZERO_ALLOC: - case QCOW2_CLUSTER_NORMAL: - /* how many allocated clusters ? */ - c = count_contiguous_clusters(bs, nb_clusters, s->cluster_size, - &l2_slice[l2_index], QCOW_OFLAG_ZERO); - *cluster_offset &= L2E_OFFSET_MASK; - if (offset_into_cluster(s, *cluster_offset)) { + case QCOW2_SUBCLUSTER_ZERO_ALLOC: + case QCOW2_SUBCLUSTER_NORMAL: + case QCOW2_SUBCLUSTER_UNALLOCATED_ALLOC: { + uint64_t host_cluster_offset = l2_entry & L2E_OFFSET_MASK; + *host_offset = host_cluster_offset + offset_in_cluster; + if (offset_into_cluster(s, host_cluster_offset)) { qcow2_signal_corruption(bs, true, -1, -1, "Cluster allocation offset %#" PRIx64 " unaligned (L2 offset: %#" PRIx64 - ", L2 index: %#x)", *cluster_offset, + ", L2 index: %#x)", host_cluster_offset, l2_offset, l2_index); ret = -EIO; goto fail; } - if (has_data_file(bs) && *cluster_offset != offset - offset_in_cluster) - { + if (has_data_file(bs) && *host_offset != offset) { qcow2_signal_corruption(bs, true, -1, -1, "External data file host cluster offset %#" PRIx64 " does not match guest cluster " "offset: %#" PRIx64 - ", L2 index: %#x)", *cluster_offset, + ", L2 index: %#x)", host_cluster_offset, offset - offset_in_cluster, l2_index); ret = -EIO; goto fail; } break; + } default: abort(); } + sc = count_contiguous_subclusters(bs, nb_clusters, sc_index, + l2_slice, &l2_index); + if (sc < 0) { + qcow2_signal_corruption(bs, true, -1, -1, "Invalid cluster entry found " + " (L2 offset: %#" PRIx64 ", L2 index: %#x)", + l2_offset, l2_index); + ret = -EIO; + goto fail; + } qcow2_cache_put(s->l2_table_cache, (void **) &l2_slice); - bytes_available = (int64_t)c * s->cluster_size; + bytes_available = ((int64_t)sc + sc_index) << s->subcluster_bits; out: if (bytes_available > bytes_needed) { @@ -654,7 +719,9 @@ out: assert(bytes_available - offset_in_cluster <= UINT_MAX); *bytes = bytes_available - offset_in_cluster; - return type; + *subcluster_type = type; + + return 0; fail: qcow2_cache_put(s->l2_table_cache, (void **)&l2_slice); @@ -709,7 +776,7 @@ static int get_cluster_table(BlockDriverState *bs, uint64_t offset, /* Then decrease the refcount of the old table */ if (l2_offset) { - qcow2_free_clusters(bs, l2_offset, s->l2_size * sizeof(uint64_t), + qcow2_free_clusters(bs, l2_offset, s->l2_size * l2_entry_size(s), QCOW2_DISCARD_OTHER); } @@ -765,7 +832,7 @@ int qcow2_alloc_compressed_cluster_offset(BlockDriverState *bs, /* Compression can't overwrite anything. Fail if the cluster was already * allocated. */ - cluster_offset = be64_to_cpu(l2_slice[l2_index]); + cluster_offset = get_l2_entry(s, l2_slice, l2_index); if (cluster_offset & L2E_OFFSET_MASK) { qcow2_cache_put(s->l2_table_cache, (void **) &l2_slice); return -EIO; @@ -794,7 +861,10 @@ int qcow2_alloc_compressed_cluster_offset(BlockDriverState *bs, BLKDBG_EVENT(bs->file, BLKDBG_L2_UPDATE_COMPRESSED); qcow2_cache_entry_mark_dirty(s->l2_table_cache, l2_slice); - l2_slice[l2_index] = cpu_to_be64(cluster_offset); + set_l2_entry(s, l2_slice, l2_index, cluster_offset); + if (has_subclusters(s)) { + set_l2_bitmap(s, l2_slice, l2_index, 0); + } qcow2_cache_put(s->l2_table_cache, (void **) &l2_slice); *host_offset = cluster_offset & s->cluster_offset_mask; @@ -987,14 +1057,32 @@ int qcow2_alloc_cluster_link_l2(BlockDriverState *bs, QCowL2Meta *m) * cluster the second one has to do RMW (which is done above by * perform_cow()), update l2 table with its cluster pointer and free * old cluster. This is what this loop does */ - if (l2_slice[l2_index + i] != 0) { - old_cluster[j++] = l2_slice[l2_index + i]; + if (get_l2_entry(s, l2_slice, l2_index + i) != 0) { + old_cluster[j++] = get_l2_entry(s, l2_slice, l2_index + i); } /* The offset must fit in the offset field of the L2 table entry */ assert((offset & L2E_OFFSET_MASK) == offset); - l2_slice[l2_index + i] = cpu_to_be64(offset | QCOW_OFLAG_COPIED); + set_l2_entry(s, l2_slice, l2_index + i, offset | QCOW_OFLAG_COPIED); + + /* Update bitmap with the subclusters that were just written */ + if (has_subclusters(s) && !m->prealloc) { + uint64_t l2_bitmap = get_l2_bitmap(s, l2_slice, l2_index + i); + unsigned written_from = m->cow_start.offset; + unsigned written_to = m->cow_end.offset + m->cow_end.nb_bytes ?: + m->nb_clusters << s->cluster_bits; + int first_sc, last_sc; + /* Narrow written_from and written_to down to the current cluster */ + written_from = MAX(written_from, i << s->cluster_bits); + written_to = MIN(written_to, (i + 1) << s->cluster_bits); + assert(written_from < written_to); + first_sc = offset_to_sc_index(s, written_from); + last_sc = offset_to_sc_index(s, written_to - 1); + l2_bitmap |= QCOW_OFLAG_SUB_ALLOC_RANGE(first_sc, last_sc + 1); + l2_bitmap &= ~QCOW_OFLAG_SUB_ZERO_RANGE(first_sc, last_sc + 1); + set_l2_bitmap(s, l2_slice, l2_index + i, l2_bitmap); + } } @@ -1008,8 +1096,7 @@ int qcow2_alloc_cluster_link_l2(BlockDriverState *bs, QCowL2Meta *m) */ if (!m->keep_old_clusters && j != 0) { for (i = 0; i < j; i++) { - qcow2_free_any_clusters(bs, be64_to_cpu(old_cluster[i]), 1, - QCOW2_DISCARD_NEVER); + qcow2_free_any_clusters(bs, old_cluster[i], 1, QCOW2_DISCARD_NEVER); } } @@ -1034,36 +1121,249 @@ void qcow2_alloc_cluster_abort(BlockDriverState *bs, QCowL2Meta *m) } /* - * Returns the number of contiguous clusters that can be used for an allocating - * write, but require COW to be performed (this includes yet unallocated space, - * which must copy from the backing file) + * For a given write request, create a new QCowL2Meta structure, add + * it to @m and the BDRVQcow2State.cluster_allocs list. If the write + * request does not need copy-on-write or changes to the L2 metadata + * then this function does nothing. + * + * @host_cluster_offset points to the beginning of the first cluster. + * + * @guest_offset and @bytes indicate the offset and length of the + * request. + * + * @l2_slice contains the L2 entries of all clusters involved in this + * write request. + * + * If @keep_old is true it means that the clusters were already + * allocated and will be overwritten. If false then the clusters are + * new and we have to decrease the reference count of the old ones. + * + * Returns 0 on success, -errno on failure. */ -static int count_cow_clusters(BlockDriverState *bs, int nb_clusters, - uint64_t *l2_slice, int l2_index) +static int calculate_l2_meta(BlockDriverState *bs, uint64_t host_cluster_offset, + uint64_t guest_offset, unsigned bytes, + uint64_t *l2_slice, QCowL2Meta **m, bool keep_old) { + BDRVQcow2State *s = bs->opaque; + int sc_index, l2_index = offset_to_l2_slice_index(s, guest_offset); + uint64_t l2_entry, l2_bitmap; + unsigned cow_start_from, cow_end_to; + unsigned cow_start_to = offset_into_cluster(s, guest_offset); + unsigned cow_end_from = cow_start_to + bytes; + unsigned nb_clusters = size_to_clusters(s, cow_end_from); + QCowL2Meta *old_m = *m; + QCow2SubclusterType type; int i; + bool skip_cow = keep_old; + assert(nb_clusters <= s->l2_slice_size - l2_index); + + /* Check the type of all affected subclusters */ for (i = 0; i < nb_clusters; i++) { - uint64_t l2_entry = be64_to_cpu(l2_slice[l2_index + i]); - QCow2ClusterType cluster_type = qcow2_get_cluster_type(bs, l2_entry); + l2_entry = get_l2_entry(s, l2_slice, l2_index + i); + l2_bitmap = get_l2_bitmap(s, l2_slice, l2_index + i); + if (skip_cow) { + unsigned write_from = MAX(cow_start_to, i << s->cluster_bits); + unsigned write_to = MIN(cow_end_from, (i + 1) << s->cluster_bits); + int first_sc = offset_to_sc_index(s, write_from); + int last_sc = offset_to_sc_index(s, write_to - 1); + int cnt = qcow2_get_subcluster_range_type(bs, l2_entry, l2_bitmap, + first_sc, &type); + /* Is any of the subclusters of type != QCOW2_SUBCLUSTER_NORMAL ? */ + if (type != QCOW2_SUBCLUSTER_NORMAL || first_sc + cnt <= last_sc) { + skip_cow = false; + } + } else { + /* If we can't skip the cow we can still look for invalid entries */ + type = qcow2_get_subcluster_type(bs, l2_entry, l2_bitmap, 0); + } + if (type == QCOW2_SUBCLUSTER_INVALID) { + int l1_index = offset_to_l1_index(s, guest_offset); + uint64_t l2_offset = s->l1_table[l1_index] & L1E_OFFSET_MASK; + qcow2_signal_corruption(bs, true, -1, -1, "Invalid cluster " + "entry found (L2 offset: %#" PRIx64 + ", L2 index: %#x)", + l2_offset, l2_index + i); + return -EIO; + } + } + + if (skip_cow) { + return 0; + } + + /* Get the L2 entry of the first cluster */ + l2_entry = get_l2_entry(s, l2_slice, l2_index); + l2_bitmap = get_l2_bitmap(s, l2_slice, l2_index); + sc_index = offset_to_sc_index(s, guest_offset); + type = qcow2_get_subcluster_type(bs, l2_entry, l2_bitmap, sc_index); - switch(cluster_type) { - case QCOW2_CLUSTER_NORMAL: - if (l2_entry & QCOW_OFLAG_COPIED) { - goto out; + if (!keep_old) { + switch (type) { + case QCOW2_SUBCLUSTER_COMPRESSED: + cow_start_from = 0; + break; + case QCOW2_SUBCLUSTER_NORMAL: + case QCOW2_SUBCLUSTER_ZERO_ALLOC: + case QCOW2_SUBCLUSTER_UNALLOCATED_ALLOC: + if (has_subclusters(s)) { + /* Skip all leading zero and unallocated subclusters */ + uint32_t alloc_bitmap = l2_bitmap & QCOW_L2_BITMAP_ALL_ALLOC; + cow_start_from = + MIN(sc_index, ctz32(alloc_bitmap)) << s->subcluster_bits; + } else { + cow_start_from = 0; } break; - case QCOW2_CLUSTER_UNALLOCATED: - case QCOW2_CLUSTER_COMPRESSED: - case QCOW2_CLUSTER_ZERO_PLAIN: - case QCOW2_CLUSTER_ZERO_ALLOC: + case QCOW2_SUBCLUSTER_ZERO_PLAIN: + case QCOW2_SUBCLUSTER_UNALLOCATED_PLAIN: + cow_start_from = sc_index << s->subcluster_bits; + break; + default: + g_assert_not_reached(); + } + } else { + switch (type) { + case QCOW2_SUBCLUSTER_NORMAL: + cow_start_from = cow_start_to; + break; + case QCOW2_SUBCLUSTER_ZERO_ALLOC: + case QCOW2_SUBCLUSTER_UNALLOCATED_ALLOC: + cow_start_from = sc_index << s->subcluster_bits; break; default: - abort(); + g_assert_not_reached(); + } + } + + /* Get the L2 entry of the last cluster */ + l2_index += nb_clusters - 1; + l2_entry = get_l2_entry(s, l2_slice, l2_index); + l2_bitmap = get_l2_bitmap(s, l2_slice, l2_index); + sc_index = offset_to_sc_index(s, guest_offset + bytes - 1); + type = qcow2_get_subcluster_type(bs, l2_entry, l2_bitmap, sc_index); + + if (!keep_old) { + switch (type) { + case QCOW2_SUBCLUSTER_COMPRESSED: + cow_end_to = ROUND_UP(cow_end_from, s->cluster_size); + break; + case QCOW2_SUBCLUSTER_NORMAL: + case QCOW2_SUBCLUSTER_ZERO_ALLOC: + case QCOW2_SUBCLUSTER_UNALLOCATED_ALLOC: + cow_end_to = ROUND_UP(cow_end_from, s->cluster_size); + if (has_subclusters(s)) { + /* Skip all trailing zero and unallocated subclusters */ + uint32_t alloc_bitmap = l2_bitmap & QCOW_L2_BITMAP_ALL_ALLOC; + cow_end_to -= + MIN(s->subclusters_per_cluster - sc_index - 1, + clz32(alloc_bitmap)) << s->subcluster_bits; + } + break; + case QCOW2_SUBCLUSTER_ZERO_PLAIN: + case QCOW2_SUBCLUSTER_UNALLOCATED_PLAIN: + cow_end_to = ROUND_UP(cow_end_from, s->subcluster_size); + break; + default: + g_assert_not_reached(); + } + } else { + switch (type) { + case QCOW2_SUBCLUSTER_NORMAL: + cow_end_to = cow_end_from; + break; + case QCOW2_SUBCLUSTER_ZERO_ALLOC: + case QCOW2_SUBCLUSTER_UNALLOCATED_ALLOC: + cow_end_to = ROUND_UP(cow_end_from, s->subcluster_size); + break; + default: + g_assert_not_reached(); + } + } + + *m = g_malloc0(sizeof(**m)); + **m = (QCowL2Meta) { + .next = old_m, + + .alloc_offset = host_cluster_offset, + .offset = start_of_cluster(s, guest_offset), + .nb_clusters = nb_clusters, + + .keep_old_clusters = keep_old, + + .cow_start = { + .offset = cow_start_from, + .nb_bytes = cow_start_to - cow_start_from, + }, + .cow_end = { + .offset = cow_end_from, + .nb_bytes = cow_end_to - cow_end_from, + }, + }; + + qemu_co_queue_init(&(*m)->dependent_requests); + QLIST_INSERT_HEAD(&s->cluster_allocs, *m, next_in_flight); + + return 0; +} + +/* + * Returns true if writing to the cluster pointed to by @l2_entry + * requires a new allocation (that is, if the cluster is unallocated + * or has refcount > 1 and therefore cannot be written in-place). + */ +static bool cluster_needs_new_alloc(BlockDriverState *bs, uint64_t l2_entry) +{ + switch (qcow2_get_cluster_type(bs, l2_entry)) { + case QCOW2_CLUSTER_NORMAL: + case QCOW2_CLUSTER_ZERO_ALLOC: + if (l2_entry & QCOW_OFLAG_COPIED) { + return false; + } + case QCOW2_CLUSTER_UNALLOCATED: + case QCOW2_CLUSTER_COMPRESSED: + case QCOW2_CLUSTER_ZERO_PLAIN: + return true; + default: + abort(); + } +} + +/* + * Returns the number of contiguous clusters that can be written to + * using one single write request, starting from @l2_index. + * At most @nb_clusters are checked. + * + * If @new_alloc is true this counts clusters that are either + * unallocated, or allocated but with refcount > 1 (so they need to be + * newly allocated and COWed). + * + * If @new_alloc is false this counts clusters that are already + * allocated and can be overwritten in-place (this includes clusters + * of type QCOW2_CLUSTER_ZERO_ALLOC). + */ +static int count_single_write_clusters(BlockDriverState *bs, int nb_clusters, + uint64_t *l2_slice, int l2_index, + bool new_alloc) +{ + BDRVQcow2State *s = bs->opaque; + uint64_t l2_entry = get_l2_entry(s, l2_slice, l2_index); + uint64_t expected_offset = l2_entry & L2E_OFFSET_MASK; + int i; + + for (i = 0; i < nb_clusters; i++) { + l2_entry = get_l2_entry(s, l2_slice, l2_index + i); + if (cluster_needs_new_alloc(bs, l2_entry) != new_alloc) { + break; + } + if (!new_alloc) { + if (expected_offset != (l2_entry & L2E_OFFSET_MASK)) { + break; + } + expected_offset += s->cluster_size; } } -out: assert(i <= nb_clusters); return i; } @@ -1093,8 +1393,8 @@ static int handle_dependencies(BlockDriverState *bs, uint64_t guest_offset, uint64_t start = guest_offset; uint64_t end = start + bytes; - uint64_t old_start = l2meta_cow_start(old_alloc); - uint64_t old_end = l2meta_cow_end(old_alloc); + uint64_t old_start = start_of_cluster(s, l2meta_cow_start(old_alloc)); + uint64_t old_end = ROUND_UP(l2meta_cow_end(old_alloc), s->cluster_size); if (end <= old_start || start >= old_end) { /* No intersection */ @@ -1132,10 +1432,10 @@ static int handle_dependencies(BlockDriverState *bs, uint64_t guest_offset, } /* - * Checks how many already allocated clusters that don't require a copy on - * write there are at the given guest_offset (up to *bytes). If *host_offset is - * not INV_OFFSET, only physically contiguous clusters beginning at this host - * offset are counted. + * Checks how many already allocated clusters that don't require a new + * allocation there are at the given guest_offset (up to *bytes). + * If *host_offset is not INV_OFFSET, only physically contiguous clusters + * beginning at this host offset are counted. * * Note that guest_offset may not be cluster aligned. In this case, the * returned *host_offset points to exact byte referenced by guest_offset and @@ -1144,12 +1444,12 @@ static int handle_dependencies(BlockDriverState *bs, uint64_t guest_offset, * Returns: * 0: if no allocated clusters are available at the given offset. * *bytes is normally unchanged. It is set to 0 if the cluster - * is allocated and doesn't need COW, but doesn't have the right - * physical offset. + * is allocated and can be overwritten in-place but doesn't have + * the right physical offset. * - * 1: if allocated clusters that don't require a COW are available at - * the requested offset. *bytes may have decreased and describes - * the length of the area that can be written to. + * 1: if allocated clusters that can be overwritten in place are + * available at the requested offset. *bytes may have decreased + * and describes the length of the area that can be written to. * * -errno: in error cases */ @@ -1158,7 +1458,7 @@ static int handle_copied(BlockDriverState *bs, uint64_t guest_offset, { BDRVQcow2State *s = bs->opaque; int l2_index; - uint64_t cluster_offset; + uint64_t l2_entry, cluster_offset; uint64_t *l2_slice; uint64_t nb_clusters; unsigned int keep_clusters; @@ -1179,7 +1479,8 @@ static int handle_copied(BlockDriverState *bs, uint64_t guest_offset, l2_index = offset_to_l2_slice_index(s, guest_offset); nb_clusters = MIN(nb_clusters, s->l2_slice_size - l2_index); - assert(nb_clusters <= INT_MAX); + /* Limit total byte count to BDRV_REQUEST_MAX_BYTES */ + nb_clusters = MIN(nb_clusters, BDRV_REQUEST_MAX_BYTES >> s->cluster_bits); /* Find L2 entry for the first involved cluster */ ret = get_cluster_table(bs, guest_offset, &l2_slice, &l2_index); @@ -1187,41 +1488,42 @@ static int handle_copied(BlockDriverState *bs, uint64_t guest_offset, return ret; } - cluster_offset = be64_to_cpu(l2_slice[l2_index]); + l2_entry = get_l2_entry(s, l2_slice, l2_index); + cluster_offset = l2_entry & L2E_OFFSET_MASK; - /* Check how many clusters are already allocated and don't need COW */ - if (qcow2_get_cluster_type(bs, cluster_offset) == QCOW2_CLUSTER_NORMAL - && (cluster_offset & QCOW_OFLAG_COPIED)) - { - /* If a specific host_offset is required, check it */ - bool offset_matches = - (cluster_offset & L2E_OFFSET_MASK) == *host_offset; - - if (offset_into_cluster(s, cluster_offset & L2E_OFFSET_MASK)) { - qcow2_signal_corruption(bs, true, -1, -1, "Data cluster offset " - "%#llx unaligned (guest offset: %#" PRIx64 - ")", cluster_offset & L2E_OFFSET_MASK, - guest_offset); + if (!cluster_needs_new_alloc(bs, l2_entry)) { + if (offset_into_cluster(s, cluster_offset)) { + qcow2_signal_corruption(bs, true, -1, -1, "%s cluster offset " + "%#" PRIx64 " unaligned (guest offset: %#" + PRIx64 ")", l2_entry & QCOW_OFLAG_ZERO ? + "Preallocated zero" : "Data", + cluster_offset, guest_offset); ret = -EIO; goto out; } - if (*host_offset != INV_OFFSET && !offset_matches) { + /* If a specific host_offset is required, check it */ + if (*host_offset != INV_OFFSET && cluster_offset != *host_offset) { *bytes = 0; ret = 0; goto out; } /* We keep all QCOW_OFLAG_COPIED clusters */ - keep_clusters = - count_contiguous_clusters(bs, nb_clusters, s->cluster_size, - &l2_slice[l2_index], - QCOW_OFLAG_COPIED | QCOW_OFLAG_ZERO); + keep_clusters = count_single_write_clusters(bs, nb_clusters, l2_slice, + l2_index, false); assert(keep_clusters <= nb_clusters); *bytes = MIN(*bytes, keep_clusters * s->cluster_size - offset_into_cluster(s, guest_offset)); + assert(*bytes != 0); + + ret = calculate_l2_meta(bs, cluster_offset, guest_offset, + *bytes, l2_slice, m, true); + if (ret < 0) { + goto out; + } ret = 1; } else { @@ -1235,8 +1537,7 @@ out: /* Only return a host offset if we actually made progress. Otherwise we * would make requirements for handle_alloc() that it can't fulfill */ if (ret > 0) { - *host_offset = (cluster_offset & L2E_OFFSET_MASK) - + offset_into_cluster(s, guest_offset); + *host_offset = cluster_offset + offset_into_cluster(s, guest_offset); } return ret; @@ -1297,9 +1598,10 @@ static int do_alloc_cluster_offset(BlockDriverState *bs, uint64_t guest_offset, } /* - * Allocates new clusters for an area that either is yet unallocated or needs a - * copy on write. If *host_offset is not INV_OFFSET, clusters are only - * allocated if the new allocation can match the specified host offset. + * Allocates new clusters for an area that is either still unallocated or + * cannot be overwritten in-place. If *host_offset is not INV_OFFSET, + * clusters are only allocated if the new allocation can match the specified + * host offset. * * Note that guest_offset may not be cluster aligned. In this case, the * returned *host_offset points to exact byte referenced by guest_offset and @@ -1322,12 +1624,10 @@ static int handle_alloc(BlockDriverState *bs, uint64_t guest_offset, BDRVQcow2State *s = bs->opaque; int l2_index; uint64_t *l2_slice; - uint64_t entry; uint64_t nb_clusters; int ret; - bool keep_old_clusters = false; - uint64_t alloc_cluster_offset = INV_OFFSET; + uint64_t alloc_cluster_offset; trace_qcow2_handle_alloc(qemu_coroutine_self(), guest_offset, *host_offset, *bytes); @@ -1342,10 +1642,8 @@ static int handle_alloc(BlockDriverState *bs, uint64_t guest_offset, l2_index = offset_to_l2_slice_index(s, guest_offset); nb_clusters = MIN(nb_clusters, s->l2_slice_size - l2_index); - assert(nb_clusters <= INT_MAX); - - /* Limit total allocation byte count to INT_MAX */ - nb_clusters = MIN(nb_clusters, INT_MAX >> s->cluster_bits); + /* Limit total allocation byte count to BDRV_REQUEST_MAX_BYTES */ + nb_clusters = MIN(nb_clusters, BDRV_REQUEST_MAX_BYTES >> s->cluster_bits); /* Find L2 entry for the first involved cluster */ ret = get_cluster_table(bs, guest_offset, &l2_slice, &l2_index); @@ -1353,67 +1651,32 @@ static int handle_alloc(BlockDriverState *bs, uint64_t guest_offset, return ret; } - entry = be64_to_cpu(l2_slice[l2_index]); - nb_clusters = count_cow_clusters(bs, nb_clusters, l2_slice, l2_index); + nb_clusters = count_single_write_clusters(bs, nb_clusters, + l2_slice, l2_index, true); /* This function is only called when there were no non-COW clusters, so if * we can't find any unallocated or COW clusters either, something is * wrong with our code. */ assert(nb_clusters > 0); - if (qcow2_get_cluster_type(bs, entry) == QCOW2_CLUSTER_ZERO_ALLOC && - (entry & QCOW_OFLAG_COPIED) && - (*host_offset == INV_OFFSET || - start_of_cluster(s, *host_offset) == (entry & L2E_OFFSET_MASK))) - { - int preallocated_nb_clusters; - - if (offset_into_cluster(s, entry & L2E_OFFSET_MASK)) { - qcow2_signal_corruption(bs, true, -1, -1, "Preallocated zero " - "cluster offset %#llx unaligned (guest " - "offset: %#" PRIx64 ")", - entry & L2E_OFFSET_MASK, guest_offset); - ret = -EIO; - goto fail; - } - - /* Try to reuse preallocated zero clusters; contiguous normal clusters - * would be fine, too, but count_cow_clusters() above has limited - * nb_clusters already to a range of COW clusters */ - preallocated_nb_clusters = - count_contiguous_clusters(bs, nb_clusters, s->cluster_size, - &l2_slice[l2_index], QCOW_OFLAG_COPIED); - assert(preallocated_nb_clusters > 0); - - nb_clusters = preallocated_nb_clusters; - alloc_cluster_offset = entry & L2E_OFFSET_MASK; - - /* We want to reuse these clusters, so qcow2_alloc_cluster_link_l2() - * should not free them. */ - keep_old_clusters = true; + /* Allocate at a given offset in the image file */ + alloc_cluster_offset = *host_offset == INV_OFFSET ? INV_OFFSET : + start_of_cluster(s, *host_offset); + ret = do_alloc_cluster_offset(bs, guest_offset, &alloc_cluster_offset, + &nb_clusters); + if (ret < 0) { + goto out; } - qcow2_cache_put(s->l2_table_cache, (void **) &l2_slice); - - if (alloc_cluster_offset == INV_OFFSET) { - /* Allocate, if necessary at a given offset in the image file */ - alloc_cluster_offset = *host_offset == INV_OFFSET ? INV_OFFSET : - start_of_cluster(s, *host_offset); - ret = do_alloc_cluster_offset(bs, guest_offset, &alloc_cluster_offset, - &nb_clusters); - if (ret < 0) { - goto fail; - } - - /* Can't extend contiguous allocation */ - if (nb_clusters == 0) { - *bytes = 0; - return 0; - } - - assert(alloc_cluster_offset != INV_OFFSET); + /* Can't extend contiguous allocation */ + if (nb_clusters == 0) { + *bytes = 0; + ret = 0; + goto out; } + assert(alloc_cluster_offset != INV_OFFSET); + /* * Save info needed for meta data update. * @@ -1431,39 +1694,22 @@ static int handle_alloc(BlockDriverState *bs, uint64_t guest_offset, uint64_t requested_bytes = *bytes + offset_into_cluster(s, guest_offset); int avail_bytes = nb_clusters << s->cluster_bits; int nb_bytes = MIN(requested_bytes, avail_bytes); - QCowL2Meta *old_m = *m; - - *m = g_malloc0(sizeof(**m)); - - **m = (QCowL2Meta) { - .next = old_m, - - .alloc_offset = alloc_cluster_offset, - .offset = start_of_cluster(s, guest_offset), - .nb_clusters = nb_clusters, - - .keep_old_clusters = keep_old_clusters, - - .cow_start = { - .offset = 0, - .nb_bytes = offset_into_cluster(s, guest_offset), - }, - .cow_end = { - .offset = nb_bytes, - .nb_bytes = avail_bytes - nb_bytes, - }, - }; - qemu_co_queue_init(&(*m)->dependent_requests); - QLIST_INSERT_HEAD(&s->cluster_allocs, *m, next_in_flight); *host_offset = alloc_cluster_offset + offset_into_cluster(s, guest_offset); *bytes = MIN(*bytes, nb_bytes - offset_into_cluster(s, guest_offset)); assert(*bytes != 0); - return 1; + ret = calculate_l2_meta(bs, alloc_cluster_offset, guest_offset, *bytes, + l2_slice, m, false); + if (ret < 0) { + goto out; + } -fail: - if (*m && (*m)->nb_clusters > 0) { + ret = 1; + +out: + qcow2_cache_put(s->l2_table_cache, (void **) &l2_slice); + if (ret < 0 && *m && (*m)->nb_clusters > 0) { QLIST_REMOVE(*m, next_in_flight); } return ret; @@ -1623,11 +1869,17 @@ static int discard_in_l2_slice(BlockDriverState *bs, uint64_t offset, assert(nb_clusters <= INT_MAX); for (i = 0; i < nb_clusters; i++) { - uint64_t old_l2_entry; - - old_l2_entry = be64_to_cpu(l2_slice[l2_index + i]); + uint64_t old_l2_entry = get_l2_entry(s, l2_slice, l2_index + i); + uint64_t old_l2_bitmap = get_l2_bitmap(s, l2_slice, l2_index + i); + uint64_t new_l2_entry = old_l2_entry; + uint64_t new_l2_bitmap = old_l2_bitmap; + QCow2ClusterType cluster_type = + qcow2_get_cluster_type(bs, old_l2_entry); /* + * If full_discard is true, the cluster should not read back as zeroes, + * but rather fall through to the backing file. + * * If full_discard is false, make sure that a discarded area reads back * as zeroes for v3 images (we cannot do it for v2 without actually * writing a zero-filled buffer). We can skip the operation if the @@ -1636,40 +1888,28 @@ static int discard_in_l2_slice(BlockDriverState *bs, uint64_t offset, * * TODO We might want to use bdrv_block_status(bs) here, but we're * holding s->lock, so that doesn't work today. - * - * If full_discard is true, the sector should not read back as zeroes, - * but rather fall through to the backing file. */ - switch (qcow2_get_cluster_type(bs, old_l2_entry)) { - case QCOW2_CLUSTER_UNALLOCATED: - if (full_discard || !bs->backing) { - continue; - } - break; - - case QCOW2_CLUSTER_ZERO_PLAIN: - if (!full_discard) { - continue; + if (full_discard) { + new_l2_entry = new_l2_bitmap = 0; + } else if (bs->backing || qcow2_cluster_is_allocated(cluster_type)) { + if (has_subclusters(s)) { + new_l2_entry = 0; + new_l2_bitmap = QCOW_L2_BITMAP_ALL_ZEROES; + } else { + new_l2_entry = s->qcow_version >= 3 ? QCOW_OFLAG_ZERO : 0; } - break; - - case QCOW2_CLUSTER_ZERO_ALLOC: - case QCOW2_CLUSTER_NORMAL: - case QCOW2_CLUSTER_COMPRESSED: - break; + } - default: - abort(); + if (old_l2_entry == new_l2_entry && old_l2_bitmap == new_l2_bitmap) { + continue; } /* First remove L2 entries */ qcow2_cache_entry_mark_dirty(s->l2_table_cache, l2_slice); - if (!full_discard && s->qcow_version >= 3) { - l2_slice[l2_index + i] = cpu_to_be64(QCOW_OFLAG_ZERO); - } else { - l2_slice[l2_index + i] = cpu_to_be64(0); + set_l2_entry(s, l2_slice, l2_index + i, new_l2_entry); + if (has_subclusters(s)) { + set_l2_bitmap(s, l2_slice, l2_index + i, new_l2_bitmap); } - /* Then decrease the refcount */ qcow2_free_any_clusters(bs, old_l2_entry, 1, type); } @@ -1732,7 +1972,6 @@ static int zero_in_l2_slice(BlockDriverState *bs, uint64_t offset, int l2_index; int ret; int i; - bool unmap = !!(flags & BDRV_REQ_MAY_UNMAP); ret = get_cluster_table(bs, offset, &l2_slice, &l2_index); if (ret < 0) { @@ -1744,27 +1983,31 @@ static int zero_in_l2_slice(BlockDriverState *bs, uint64_t offset, assert(nb_clusters <= INT_MAX); for (i = 0; i < nb_clusters; i++) { - uint64_t old_offset; - QCow2ClusterType cluster_type; - - old_offset = be64_to_cpu(l2_slice[l2_index + i]); + uint64_t old_l2_entry = get_l2_entry(s, l2_slice, l2_index + i); + uint64_t old_l2_bitmap = get_l2_bitmap(s, l2_slice, l2_index + i); + QCow2ClusterType type = qcow2_get_cluster_type(bs, old_l2_entry); + bool unmap = (type == QCOW2_CLUSTER_COMPRESSED) || + ((flags & BDRV_REQ_MAY_UNMAP) && qcow2_cluster_is_allocated(type)); + uint64_t new_l2_entry = unmap ? 0 : old_l2_entry; + uint64_t new_l2_bitmap = old_l2_bitmap; + + if (has_subclusters(s)) { + new_l2_bitmap = QCOW_L2_BITMAP_ALL_ZEROES; + } else { + new_l2_entry |= QCOW_OFLAG_ZERO; + } - /* - * Minimize L2 changes if the cluster already reads back as - * zeroes with correct allocation. - */ - cluster_type = qcow2_get_cluster_type(bs, old_offset); - if (cluster_type == QCOW2_CLUSTER_ZERO_PLAIN || - (cluster_type == QCOW2_CLUSTER_ZERO_ALLOC && !unmap)) { + if (old_l2_entry == new_l2_entry && old_l2_bitmap == new_l2_bitmap) { continue; } qcow2_cache_entry_mark_dirty(s->l2_table_cache, l2_slice); - if (cluster_type == QCOW2_CLUSTER_COMPRESSED || unmap) { - l2_slice[l2_index + i] = cpu_to_be64(QCOW_OFLAG_ZERO); - qcow2_free_any_clusters(bs, old_offset, 1, QCOW2_DISCARD_REQUEST); - } else { - l2_slice[l2_index + i] |= cpu_to_be64(QCOW_OFLAG_ZERO); + if (unmap) { + qcow2_free_any_clusters(bs, old_l2_entry, 1, QCOW2_DISCARD_REQUEST); + } + set_l2_entry(s, l2_slice, l2_index + i, new_l2_entry); + if (has_subclusters(s)) { + set_l2_bitmap(s, l2_slice, l2_index + i, new_l2_bitmap); } } @@ -1773,12 +2016,59 @@ static int zero_in_l2_slice(BlockDriverState *bs, uint64_t offset, return nb_clusters; } -int qcow2_cluster_zeroize(BlockDriverState *bs, uint64_t offset, - uint64_t bytes, int flags) +static int zero_l2_subclusters(BlockDriverState *bs, uint64_t offset, + unsigned nb_subclusters) +{ + BDRVQcow2State *s = bs->opaque; + uint64_t *l2_slice; + uint64_t old_l2_bitmap, l2_bitmap; + int l2_index, ret, sc = offset_to_sc_index(s, offset); + + /* For full clusters use zero_in_l2_slice() instead */ + assert(nb_subclusters > 0 && nb_subclusters < s->subclusters_per_cluster); + assert(sc + nb_subclusters <= s->subclusters_per_cluster); + assert(offset_into_subcluster(s, offset) == 0); + + ret = get_cluster_table(bs, offset, &l2_slice, &l2_index); + if (ret < 0) { + return ret; + } + + switch (qcow2_get_cluster_type(bs, get_l2_entry(s, l2_slice, l2_index))) { + case QCOW2_CLUSTER_COMPRESSED: + ret = -ENOTSUP; /* We cannot partially zeroize compressed clusters */ + goto out; + case QCOW2_CLUSTER_NORMAL: + case QCOW2_CLUSTER_UNALLOCATED: + break; + default: + g_assert_not_reached(); + } + + old_l2_bitmap = l2_bitmap = get_l2_bitmap(s, l2_slice, l2_index); + + l2_bitmap |= QCOW_OFLAG_SUB_ZERO_RANGE(sc, sc + nb_subclusters); + l2_bitmap &= ~QCOW_OFLAG_SUB_ALLOC_RANGE(sc, sc + nb_subclusters); + + if (old_l2_bitmap != l2_bitmap) { + set_l2_bitmap(s, l2_slice, l2_index, l2_bitmap); + qcow2_cache_entry_mark_dirty(s->l2_table_cache, l2_slice); + } + + ret = 0; +out: + qcow2_cache_put(s->l2_table_cache, (void **) &l2_slice); + + return ret; +} + +int qcow2_subcluster_zeroize(BlockDriverState *bs, uint64_t offset, + uint64_t bytes, int flags) { BDRVQcow2State *s = bs->opaque; uint64_t end_offset = offset + bytes; uint64_t nb_clusters; + unsigned head, tail; int64_t cleared; int ret; @@ -1793,8 +2083,8 @@ int qcow2_cluster_zeroize(BlockDriverState *bs, uint64_t offset, } /* Caller must pass aligned values, except at image end */ - assert(QEMU_IS_ALIGNED(offset, s->cluster_size)); - assert(QEMU_IS_ALIGNED(end_offset, s->cluster_size) || + assert(offset_into_subcluster(s, offset) == 0); + assert(offset_into_subcluster(s, end_offset) == 0 || end_offset >= bs->total_sectors << BDRV_SECTOR_BITS); /* @@ -1809,11 +2099,26 @@ int qcow2_cluster_zeroize(BlockDriverState *bs, uint64_t offset, return -ENOTSUP; } - /* Each L2 slice is handled by its own loop iteration */ - nb_clusters = size_to_clusters(s, bytes); + head = MIN(end_offset, ROUND_UP(offset, s->cluster_size)) - offset; + offset += head; + + tail = (end_offset >= bs->total_sectors << BDRV_SECTOR_BITS) ? 0 : + end_offset - MAX(offset, start_of_cluster(s, end_offset)); + end_offset -= tail; s->cache_discards = true; + if (head) { + ret = zero_l2_subclusters(bs, offset - head, + size_to_subclusters(s, head)); + if (ret < 0) { + goto fail; + } + } + + /* Each L2 slice is handled by its own loop iteration */ + nb_clusters = size_to_clusters(s, end_offset - offset); + while (nb_clusters > 0) { cleared = zero_in_l2_slice(bs, offset, nb_clusters, flags); if (cleared < 0) { @@ -1825,6 +2130,13 @@ int qcow2_cluster_zeroize(BlockDriverState *bs, uint64_t offset, offset += (cleared * s->cluster_size); } + if (tail) { + ret = zero_l2_subclusters(bs, end_offset, size_to_subclusters(s, tail)); + if (ret < 0) { + goto fail; + } + } + ret = 0; fail: s->cache_discards = false; @@ -1854,7 +2166,10 @@ static int expand_zero_clusters_in_l1(BlockDriverState *bs, uint64_t *l1_table, int ret; int i, j; - slice_size2 = s->l2_slice_size * sizeof(uint64_t); + /* qcow2_downgrade() is not allowed in images with subclusters */ + assert(!has_subclusters(s)); + + slice_size2 = s->l2_slice_size * l2_entry_size(s); n_slices = s->cluster_size / slice_size2; if (!is_active_l1) { @@ -1909,7 +2224,7 @@ static int expand_zero_clusters_in_l1(BlockDriverState *bs, uint64_t *l1_table, } for (j = 0; j < s->l2_slice_size; j++) { - uint64_t l2_entry = be64_to_cpu(l2_slice[j]); + uint64_t l2_entry = get_l2_entry(s, l2_slice, j); int64_t offset = l2_entry & L2E_OFFSET_MASK; QCow2ClusterType cluster_type = qcow2_get_cluster_type(bs, l2_entry); @@ -1921,9 +2236,12 @@ static int expand_zero_clusters_in_l1(BlockDriverState *bs, uint64_t *l1_table, if (cluster_type == QCOW2_CLUSTER_ZERO_PLAIN) { if (!bs->backing) { - /* not backed; therefore we can simply deallocate the - * cluster */ - l2_slice[j] = 0; + /* + * not backed; therefore we can simply deallocate the + * cluster. No need to call set_l2_bitmap(), this + * function doesn't support images with subclusters. + */ + set_l2_entry(s, l2_slice, j, 0); l2_dirty = true; continue; } @@ -1989,10 +2307,14 @@ static int expand_zero_clusters_in_l1(BlockDriverState *bs, uint64_t *l1_table, } if (l2_refcount == 1) { - l2_slice[j] = cpu_to_be64(offset | QCOW_OFLAG_COPIED); + set_l2_entry(s, l2_slice, j, offset | QCOW_OFLAG_COPIED); } else { - l2_slice[j] = cpu_to_be64(offset); + set_l2_entry(s, l2_slice, j, offset); } + /* + * No need to call set_l2_bitmap() after set_l2_entry() because + * this function doesn't support images with subclusters. + */ l2_dirty = true; } diff --git a/block/qcow2-refcount.c b/block/qcow2-refcount.c index 0457a6060d..aae52607eb 100644 --- a/block/qcow2-refcount.c +++ b/block/qcow2-refcount.c @@ -1254,7 +1254,7 @@ int qcow2_update_snapshot_refcount(BlockDriverState *bs, l2_slice = NULL; l1_table = NULL; l1_size2 = l1_size * sizeof(uint64_t); - slice_size2 = s->l2_slice_size * sizeof(uint64_t); + slice_size2 = s->l2_slice_size * l2_entry_size(s); n_slices = s->cluster_size / slice_size2; s->cache_discards = true; @@ -1310,7 +1310,7 @@ int qcow2_update_snapshot_refcount(BlockDriverState *bs, uint64_t cluster_index; uint64_t offset; - entry = be64_to_cpu(l2_slice[j]); + entry = get_l2_entry(s, l2_slice, j); old_entry = entry; entry &= ~QCOW_OFLAG_COPIED; offset = entry & L2E_OFFSET_MASK; @@ -1384,7 +1384,7 @@ int qcow2_update_snapshot_refcount(BlockDriverState *bs, qcow2_cache_set_dependency(bs, s->l2_table_cache, s->refcount_block_cache); } - l2_slice[j] = cpu_to_be64(entry); + set_l2_entry(s, l2_slice, j, entry); qcow2_cache_entry_mark_dirty(s->l2_table_cache, l2_slice); } @@ -1605,7 +1605,7 @@ static int check_refcounts_l2(BlockDriverState *bs, BdrvCheckResult *res, int i, l2_size, nb_csectors, ret; /* Read L2 table from disk */ - l2_size = s->l2_size * sizeof(uint64_t); + l2_size = s->l2_size * l2_entry_size(s); l2_table = g_malloc(l2_size); ret = bdrv_pread(bs->file, l2_offset, l2_table, l2_size); @@ -1617,7 +1617,7 @@ static int check_refcounts_l2(BlockDriverState *bs, BdrvCheckResult *res, /* Do the actual checks */ for(i = 0; i < s->l2_size; i++) { - l2_entry = be64_to_cpu(l2_table[i]); + l2_entry = get_l2_entry(s, l2_table, i); switch (qcow2_get_cluster_type(bs, l2_entry)) { case QCOW2_CLUSTER_COMPRESSED: @@ -1669,26 +1669,33 @@ static int check_refcounts_l2(BlockDriverState *bs, BdrvCheckResult *res, /* Correct offsets are cluster aligned */ if (offset_into_cluster(s, offset)) { + bool contains_data; res->corruptions++; - if (qcow2_get_cluster_type(bs, l2_entry) == - QCOW2_CLUSTER_ZERO_ALLOC) - { - fprintf(stderr, "%s offset=%" PRIx64 ": Preallocated zero " + if (has_subclusters(s)) { + uint64_t l2_bitmap = get_l2_bitmap(s, l2_table, i); + contains_data = (l2_bitmap & QCOW_L2_BITMAP_ALL_ALLOC); + } else { + contains_data = !(l2_entry & QCOW_OFLAG_ZERO); + } + + if (!contains_data) { + fprintf(stderr, "%s offset=%" PRIx64 ": Preallocated " "cluster is not properly aligned; L2 entry " "corrupted.\n", fix & BDRV_FIX_ERRORS ? "Repairing" : "ERROR", offset); if (fix & BDRV_FIX_ERRORS) { + int idx = i * (l2_entry_size(s) / sizeof(uint64_t)); uint64_t l2e_offset = - l2_offset + (uint64_t)i * sizeof(uint64_t); + l2_offset + (uint64_t)i * l2_entry_size(s); int ign = active ? QCOW2_OL_ACTIVE_L2 : QCOW2_OL_INACTIVE_L2; - l2_entry = QCOW_OFLAG_ZERO; - l2_table[i] = cpu_to_be64(l2_entry); + l2_entry = has_subclusters(s) ? 0 : QCOW_OFLAG_ZERO; + set_l2_entry(s, l2_table, i, l2_entry); ret = qcow2_pre_write_overlap_check(bs, ign, - l2e_offset, sizeof(uint64_t), false); + l2e_offset, l2_entry_size(s), false); if (ret < 0) { fprintf(stderr, "ERROR: Overlap check failed\n"); res->check_errors++; @@ -1698,7 +1705,8 @@ static int check_refcounts_l2(BlockDriverState *bs, BdrvCheckResult *res, } ret = bdrv_pwrite_sync(bs->file, l2e_offset, - &l2_table[i], sizeof(uint64_t)); + &l2_table[idx], + l2_entry_size(s)); if (ret < 0) { fprintf(stderr, "ERROR: Failed to overwrite L2 " "table entry: %s\n", strerror(-ret)); @@ -1905,7 +1913,7 @@ static int check_oflag_copied(BlockDriverState *bs, BdrvCheckResult *res, } ret = bdrv_pread(bs->file, l2_offset, l2_table, - s->l2_size * sizeof(uint64_t)); + s->l2_size * l2_entry_size(s)); if (ret < 0) { fprintf(stderr, "ERROR: Could not read L2 table: %s\n", strerror(-ret)); @@ -1914,7 +1922,7 @@ static int check_oflag_copied(BlockDriverState *bs, BdrvCheckResult *res, } for (j = 0; j < s->l2_size; j++) { - uint64_t l2_entry = be64_to_cpu(l2_table[j]); + uint64_t l2_entry = get_l2_entry(s, l2_table, j); uint64_t data_offset = l2_entry & L2E_OFFSET_MASK; QCow2ClusterType cluster_type = qcow2_get_cluster_type(bs, l2_entry); @@ -1937,9 +1945,10 @@ static int check_oflag_copied(BlockDriverState *bs, BdrvCheckResult *res, "l2_entry=%" PRIx64 " refcount=%" PRIu64 "\n", repair ? "Repairing" : "ERROR", l2_entry, refcount); if (repair) { - l2_table[j] = cpu_to_be64(refcount == 1 - ? l2_entry | QCOW_OFLAG_COPIED - : l2_entry & ~QCOW_OFLAG_COPIED); + set_l2_entry(s, l2_table, j, + refcount == 1 ? + l2_entry | QCOW_OFLAG_COPIED : + l2_entry & ~QCOW_OFLAG_COPIED); l2_dirty++; } } diff --git a/block/qcow2.c b/block/qcow2.c index 6ad6bdc166..da56b1a4df 100644 --- a/block/qcow2.c +++ b/block/qcow2.c @@ -74,7 +74,7 @@ typedef struct { static int coroutine_fn qcow2_co_preadv_compressed(BlockDriverState *bs, - uint64_t file_cluster_offset, + uint64_t cluster_descriptor, uint64_t offset, uint64_t bytes, QEMUIOVector *qiov, @@ -883,7 +883,7 @@ static void read_cache_sizes(BlockDriverState *bs, QemuOpts *opts, uint64_t max_l2_entries = DIV_ROUND_UP(virtual_disk_size, s->cluster_size); /* An L2 table is always one cluster in size so the max cache size * should be a multiple of the cluster size. */ - uint64_t max_l2_cache = ROUND_UP(max_l2_entries * sizeof(uint64_t), + uint64_t max_l2_cache = ROUND_UP(max_l2_entries * l2_entry_size(s), s->cluster_size); combined_cache_size_set = qemu_opt_get(opts, QCOW2_OPT_CACHE_SIZE); @@ -1042,7 +1042,7 @@ static int qcow2_update_options_prepare(BlockDriverState *bs, } } - r->l2_slice_size = l2_cache_entry_size / sizeof(uint64_t); + r->l2_slice_size = l2_cache_entry_size / l2_entry_size(s); r->l2_table_cache = qcow2_cache_create(bs, l2_cache_size, l2_cache_entry_size); r->refcount_block_cache = qcow2_cache_create(bs, refcount_cache_size, @@ -1444,6 +1444,17 @@ static int coroutine_fn qcow2_do_open(BlockDriverState *bs, QDict *options, } } + s->subclusters_per_cluster = + has_subclusters(s) ? QCOW_EXTL2_SUBCLUSTERS_PER_CLUSTER : 1; + s->subcluster_size = s->cluster_size / s->subclusters_per_cluster; + s->subcluster_bits = ctz32(s->subcluster_size); + + if (s->subcluster_size < (1 << MIN_CLUSTER_BITS)) { + error_setg(errp, "Unsupported subcluster size: %d", s->subcluster_size); + ret = -EINVAL; + goto fail; + } + /* Check support for various header values */ if (header.refcount_order > 6) { error_setg(errp, "Reference count entry width too large; may not " @@ -1484,7 +1495,7 @@ static int coroutine_fn qcow2_do_open(BlockDriverState *bs, QDict *options, bs->encrypted = true; } - s->l2_bits = s->cluster_bits - 3; /* L2 is always one cluster */ + s->l2_bits = s->cluster_bits - ctz32(l2_entry_size(s)); s->l2_size = 1 << s->l2_bits; /* 2^(s->refcount_order - 3) is the refcount width in bytes */ s->refcount_block_bits = s->cluster_bits - (s->refcount_order - 3); @@ -1908,7 +1919,7 @@ static void qcow2_refresh_limits(BlockDriverState *bs, Error **errp) /* Encryption works on a sector granularity */ bs->bl.request_alignment = qcrypto_block_get_sector_size(s->crypto); } - bs->bl.pwrite_zeroes_alignment = s->cluster_size; + bs->bl.pwrite_zeroes_alignment = s->subcluster_size; bs->bl.pdiscard_alignment = s->cluster_size; } @@ -2036,8 +2047,9 @@ static int coroutine_fn qcow2_co_block_status(BlockDriverState *bs, BlockDriverState **file) { BDRVQcow2State *s = bs->opaque; - uint64_t cluster_offset; + uint64_t host_offset; unsigned int bytes; + QCow2SubclusterType type; int ret, status = 0; qemu_co_mutex_lock(&s->lock); @@ -2049,7 +2061,7 @@ static int coroutine_fn qcow2_co_block_status(BlockDriverState *bs, } bytes = MIN(INT_MAX, count); - ret = qcow2_get_cluster_offset(bs, offset, &bytes, &cluster_offset); + ret = qcow2_get_host_offset(bs, offset, &bytes, &host_offset, &type); qemu_co_mutex_unlock(&s->lock); if (ret < 0) { return ret; @@ -2057,15 +2069,18 @@ static int coroutine_fn qcow2_co_block_status(BlockDriverState *bs, *pnum = bytes; - if ((ret == QCOW2_CLUSTER_NORMAL || ret == QCOW2_CLUSTER_ZERO_ALLOC) && - !s->crypto) { - *map = cluster_offset | offset_into_cluster(s, offset); + if ((type == QCOW2_SUBCLUSTER_NORMAL || + type == QCOW2_SUBCLUSTER_ZERO_ALLOC || + type == QCOW2_SUBCLUSTER_UNALLOCATED_ALLOC) && !s->crypto) { + *map = host_offset; *file = s->data_file->bs; status |= BDRV_BLOCK_OFFSET_VALID; } - if (ret == QCOW2_CLUSTER_ZERO_PLAIN || ret == QCOW2_CLUSTER_ZERO_ALLOC) { + if (type == QCOW2_SUBCLUSTER_ZERO_PLAIN || + type == QCOW2_SUBCLUSTER_ZERO_ALLOC) { status |= BDRV_BLOCK_ZERO; - } else if (ret != QCOW2_CLUSTER_UNALLOCATED) { + } else if (type != QCOW2_SUBCLUSTER_UNALLOCATED_PLAIN && + type != QCOW2_SUBCLUSTER_UNALLOCATED_ALLOC) { status |= BDRV_BLOCK_DATA; } if (s->metadata_preallocation && (status & BDRV_BLOCK_DATA) && @@ -2087,6 +2102,7 @@ static coroutine_fn int qcow2_handle_l2meta(BlockDriverState *bs, QCowL2Meta *next; if (link_l2) { + assert(!l2meta->prealloc); ret = qcow2_alloc_cluster_link_l2(bs, l2meta); if (ret) { goto out; @@ -2113,7 +2129,7 @@ out: static coroutine_fn int qcow2_co_preadv_encrypted(BlockDriverState *bs, - uint64_t file_cluster_offset, + uint64_t host_offset, uint64_t offset, uint64_t bytes, QEMUIOVector *qiov, @@ -2140,16 +2156,12 @@ qcow2_co_preadv_encrypted(BlockDriverState *bs, } BLKDBG_EVENT(bs->file, BLKDBG_READ_AIO); - ret = bdrv_co_pread(s->data_file, - file_cluster_offset + offset_into_cluster(s, offset), - bytes, buf, 0); + ret = bdrv_co_pread(s->data_file, host_offset, bytes, buf, 0); if (ret < 0) { goto fail; } - if (qcow2_co_decrypt(bs, - file_cluster_offset + offset_into_cluster(s, offset), - offset, buf, bytes) < 0) + if (qcow2_co_decrypt(bs, host_offset, offset, buf, bytes) < 0) { ret = -EIO; goto fail; @@ -2166,8 +2178,8 @@ typedef struct Qcow2AioTask { AioTask task; BlockDriverState *bs; - QCow2ClusterType cluster_type; /* only for read */ - uint64_t file_cluster_offset; + QCow2SubclusterType subcluster_type; /* only for read */ + uint64_t host_offset; /* or full descriptor in compressed clusters */ uint64_t offset; uint64_t bytes; QEMUIOVector *qiov; @@ -2179,8 +2191,8 @@ static coroutine_fn int qcow2_co_preadv_task_entry(AioTask *task); static coroutine_fn int qcow2_add_task(BlockDriverState *bs, AioTaskPool *pool, AioTaskFunc func, - QCow2ClusterType cluster_type, - uint64_t file_cluster_offset, + QCow2SubclusterType subcluster_type, + uint64_t host_offset, uint64_t offset, uint64_t bytes, QEMUIOVector *qiov, @@ -2193,9 +2205,9 @@ static coroutine_fn int qcow2_add_task(BlockDriverState *bs, *task = (Qcow2AioTask) { .task.func = func, .bs = bs, - .cluster_type = cluster_type, + .subcluster_type = subcluster_type, .qiov = qiov, - .file_cluster_offset = file_cluster_offset, + .host_offset = host_offset, .offset = offset, .bytes = bytes, .qiov_offset = qiov_offset, @@ -2204,7 +2216,7 @@ static coroutine_fn int qcow2_add_task(BlockDriverState *bs, trace_qcow2_add_task(qemu_coroutine_self(), bs, pool, func == qcow2_co_preadv_task_entry ? "read" : "write", - cluster_type, file_cluster_offset, offset, bytes, + subcluster_type, host_offset, offset, bytes, qiov, qiov_offset); if (!pool) { @@ -2217,42 +2229,40 @@ static coroutine_fn int qcow2_add_task(BlockDriverState *bs, } static coroutine_fn int qcow2_co_preadv_task(BlockDriverState *bs, - QCow2ClusterType cluster_type, - uint64_t file_cluster_offset, + QCow2SubclusterType subc_type, + uint64_t host_offset, uint64_t offset, uint64_t bytes, QEMUIOVector *qiov, size_t qiov_offset) { BDRVQcow2State *s = bs->opaque; - int offset_in_cluster = offset_into_cluster(s, offset); - switch (cluster_type) { - case QCOW2_CLUSTER_ZERO_PLAIN: - case QCOW2_CLUSTER_ZERO_ALLOC: + switch (subc_type) { + case QCOW2_SUBCLUSTER_ZERO_PLAIN: + case QCOW2_SUBCLUSTER_ZERO_ALLOC: /* Both zero types are handled in qcow2_co_preadv_part */ g_assert_not_reached(); - case QCOW2_CLUSTER_UNALLOCATED: + case QCOW2_SUBCLUSTER_UNALLOCATED_PLAIN: + case QCOW2_SUBCLUSTER_UNALLOCATED_ALLOC: assert(bs->backing); /* otherwise handled in qcow2_co_preadv_part */ BLKDBG_EVENT(bs->file, BLKDBG_READ_BACKING_AIO); return bdrv_co_preadv_part(bs->backing, offset, bytes, qiov, qiov_offset, 0); - case QCOW2_CLUSTER_COMPRESSED: - return qcow2_co_preadv_compressed(bs, file_cluster_offset, + case QCOW2_SUBCLUSTER_COMPRESSED: + return qcow2_co_preadv_compressed(bs, host_offset, offset, bytes, qiov, qiov_offset); - case QCOW2_CLUSTER_NORMAL: - assert(offset_into_cluster(s, file_cluster_offset) == 0); + case QCOW2_SUBCLUSTER_NORMAL: if (bs->encrypted) { - return qcow2_co_preadv_encrypted(bs, file_cluster_offset, + return qcow2_co_preadv_encrypted(bs, host_offset, offset, bytes, qiov, qiov_offset); } BLKDBG_EVENT(bs->file, BLKDBG_READ_AIO); - return bdrv_co_preadv_part(s->data_file, - file_cluster_offset + offset_in_cluster, + return bdrv_co_preadv_part(s->data_file, host_offset, bytes, qiov, qiov_offset, 0); default: @@ -2268,8 +2278,9 @@ static coroutine_fn int qcow2_co_preadv_task_entry(AioTask *task) assert(!t->l2meta); - return qcow2_co_preadv_task(t->bs, t->cluster_type, t->file_cluster_offset, - t->offset, t->bytes, t->qiov, t->qiov_offset); + return qcow2_co_preadv_task(t->bs, t->subcluster_type, + t->host_offset, t->offset, t->bytes, + t->qiov, t->qiov_offset); } static coroutine_fn int qcow2_co_preadv_part(BlockDriverState *bs, @@ -2280,7 +2291,8 @@ static coroutine_fn int qcow2_co_preadv_part(BlockDriverState *bs, BDRVQcow2State *s = bs->opaque; int ret = 0; unsigned int cur_bytes; /* number of bytes in current iteration */ - uint64_t cluster_offset = 0; + uint64_t host_offset = 0; + QCow2SubclusterType type; AioTaskPool *aio = NULL; while (bytes != 0 && aio_task_pool_status(aio) == 0) { @@ -2292,23 +2304,25 @@ static coroutine_fn int qcow2_co_preadv_part(BlockDriverState *bs, } qemu_co_mutex_lock(&s->lock); - ret = qcow2_get_cluster_offset(bs, offset, &cur_bytes, &cluster_offset); + ret = qcow2_get_host_offset(bs, offset, &cur_bytes, + &host_offset, &type); qemu_co_mutex_unlock(&s->lock); if (ret < 0) { goto out; } - if (ret == QCOW2_CLUSTER_ZERO_PLAIN || - ret == QCOW2_CLUSTER_ZERO_ALLOC || - (ret == QCOW2_CLUSTER_UNALLOCATED && !bs->backing)) + if (type == QCOW2_SUBCLUSTER_ZERO_PLAIN || + type == QCOW2_SUBCLUSTER_ZERO_ALLOC || + (type == QCOW2_SUBCLUSTER_UNALLOCATED_PLAIN && !bs->backing) || + (type == QCOW2_SUBCLUSTER_UNALLOCATED_ALLOC && !bs->backing)) { qemu_iovec_memset(qiov, qiov_offset, 0, cur_bytes); } else { if (!aio && cur_bytes != bytes) { aio = aio_task_pool_new(QCOW2_MAX_WORKERS); } - ret = qcow2_add_task(bs, aio, qcow2_co_preadv_task_entry, ret, - cluster_offset, offset, cur_bytes, + ret = qcow2_add_task(bs, aio, qcow2_co_preadv_task_entry, type, + host_offset, offset, cur_bytes, qiov, qiov_offset, NULL); if (ret < 0) { goto out; @@ -2414,6 +2428,9 @@ static int handle_alloc_space(BlockDriverState *bs, QCowL2Meta *l2meta) for (m = l2meta; m != NULL; m = m->next) { int ret; + uint64_t start_offset = m->alloc_offset + m->cow_start.offset; + unsigned nb_bytes = m->cow_end.offset + m->cow_end.nb_bytes - + m->cow_start.offset; if (!m->cow_start.nb_bytes && !m->cow_end.nb_bytes) { continue; @@ -2428,16 +2445,14 @@ static int handle_alloc_space(BlockDriverState *bs, QCowL2Meta *l2meta) * efficiently zero out the whole clusters */ - ret = qcow2_pre_write_overlap_check(bs, 0, m->alloc_offset, - m->nb_clusters * s->cluster_size, + ret = qcow2_pre_write_overlap_check(bs, 0, start_offset, nb_bytes, true); if (ret < 0) { return ret; } BLKDBG_EVENT(bs->file, BLKDBG_CLUSTER_ALLOC_SPACE); - ret = bdrv_co_pwrite_zeroes(s->data_file, m->alloc_offset, - m->nb_clusters * s->cluster_size, + ret = bdrv_co_pwrite_zeroes(s->data_file, start_offset, nb_bytes, BDRV_REQ_NO_FALLBACK); if (ret < 0) { if (ret != -ENOTSUP && ret != -EAGAIN) { @@ -2459,7 +2474,7 @@ static int handle_alloc_space(BlockDriverState *bs, QCowL2Meta *l2meta) * not use it somehow after qcow2_co_pwritev_task() call */ static coroutine_fn int qcow2_co_pwritev_task(BlockDriverState *bs, - uint64_t file_cluster_offset, + uint64_t host_offset, uint64_t offset, uint64_t bytes, QEMUIOVector *qiov, uint64_t qiov_offset, @@ -2468,7 +2483,6 @@ static coroutine_fn int qcow2_co_pwritev_task(BlockDriverState *bs, int ret; BDRVQcow2State *s = bs->opaque; void *crypt_buf = NULL; - int offset_in_cluster = offset_into_cluster(s, offset); QEMUIOVector encrypted_qiov; if (bs->encrypted) { @@ -2481,9 +2495,7 @@ static coroutine_fn int qcow2_co_pwritev_task(BlockDriverState *bs, } qemu_iovec_to_buf(qiov, qiov_offset, crypt_buf, bytes); - if (qcow2_co_encrypt(bs, file_cluster_offset + offset_in_cluster, - offset, crypt_buf, bytes) < 0) - { + if (qcow2_co_encrypt(bs, host_offset, offset, crypt_buf, bytes) < 0) { ret = -EIO; goto out_unlocked; } @@ -2507,10 +2519,8 @@ static coroutine_fn int qcow2_co_pwritev_task(BlockDriverState *bs, */ if (!merge_cow(offset, bytes, qiov, qiov_offset, l2meta)) { BLKDBG_EVENT(bs->file, BLKDBG_WRITE_AIO); - trace_qcow2_writev_data(qemu_coroutine_self(), - file_cluster_offset + offset_in_cluster); - ret = bdrv_co_pwritev_part(s->data_file, - file_cluster_offset + offset_in_cluster, + trace_qcow2_writev_data(qemu_coroutine_self(), host_offset); + ret = bdrv_co_pwritev_part(s->data_file, host_offset, bytes, qiov, qiov_offset, 0); if (ret < 0) { goto out_unlocked; @@ -2538,9 +2548,9 @@ static coroutine_fn int qcow2_co_pwritev_task_entry(AioTask *task) { Qcow2AioTask *t = container_of(task, Qcow2AioTask, task); - assert(!t->cluster_type); + assert(!t->subcluster_type); - return qcow2_co_pwritev_task(t->bs, t->file_cluster_offset, + return qcow2_co_pwritev_task(t->bs, t->host_offset, t->offset, t->bytes, t->qiov, t->qiov_offset, t->l2meta); } @@ -2595,8 +2605,8 @@ static coroutine_fn int qcow2_co_pwritev_part( aio = aio_task_pool_new(QCOW2_MAX_WORKERS); } ret = qcow2_add_task(bs, aio, qcow2_co_pwritev_task_entry, 0, - cluster_offset, offset, cur_bytes, - qiov, qiov_offset, l2meta); + cluster_offset + offset_in_cluster, offset, + cur_bytes, qiov, qiov_offset, l2meta); l2meta = NULL; /* l2meta is consumed by qcow2_co_pwritev_task() */ if (ret < 0) { goto fail_nometa; @@ -2931,6 +2941,11 @@ int qcow2_update_header(BlockDriverState *bs) .name = "compression type", }, { + .type = QCOW2_FEAT_TYPE_INCOMPATIBLE, + .bit = QCOW2_INCOMPAT_EXTL2_BITNR, + .name = "extended L2 entries", + }, + { .type = QCOW2_FEAT_TYPE_COMPATIBLE, .bit = QCOW2_COMPAT_LAZY_REFCOUNTS_BITNR, .name = "lazy refcounts", @@ -3127,6 +3142,7 @@ static int coroutine_fn preallocate_co(BlockDriverState *bs, uint64_t offset, while (meta) { QCowL2Meta *next = meta->next; + meta->prealloc = true; ret = qcow2_alloc_cluster_link_l2(bs, meta); if (ret < 0) { @@ -3229,28 +3245,31 @@ int64_t qcow2_refcount_metadata_size(int64_t clusters, size_t cluster_size, * @total_size: virtual disk size in bytes * @cluster_size: cluster size in bytes * @refcount_order: refcount bits power-of-2 exponent + * @extended_l2: true if the image has extended L2 entries * * Returns: Total number of bytes required for the fully allocated image * (including metadata). */ static int64_t qcow2_calc_prealloc_size(int64_t total_size, size_t cluster_size, - int refcount_order) + int refcount_order, + bool extended_l2) { int64_t meta_size = 0; uint64_t nl1e, nl2e; int64_t aligned_total_size = ROUND_UP(total_size, cluster_size); + size_t l2e_size = extended_l2 ? L2E_SIZE_EXTENDED : L2E_SIZE_NORMAL; /* header: 1 cluster */ meta_size += cluster_size; /* total size of L2 tables */ nl2e = aligned_total_size / cluster_size; - nl2e = ROUND_UP(nl2e, cluster_size / sizeof(uint64_t)); - meta_size += nl2e * sizeof(uint64_t); + nl2e = ROUND_UP(nl2e, cluster_size / l2e_size); + meta_size += nl2e * l2e_size; /* total size of L1 tables */ - nl1e = nl2e * sizeof(uint64_t) / cluster_size; + nl1e = nl2e * l2e_size / cluster_size; nl1e = ROUND_UP(nl1e, cluster_size / sizeof(uint64_t)); meta_size += nl1e * sizeof(uint64_t); @@ -3262,7 +3281,8 @@ static int64_t qcow2_calc_prealloc_size(int64_t total_size, return meta_size + aligned_total_size; } -static bool validate_cluster_size(size_t cluster_size, Error **errp) +static bool validate_cluster_size(size_t cluster_size, bool extended_l2, + Error **errp) { int cluster_bits = ctz32(cluster_size); if (cluster_bits < MIN_CLUSTER_BITS || cluster_bits > MAX_CLUSTER_BITS || @@ -3272,16 +3292,28 @@ static bool validate_cluster_size(size_t cluster_size, Error **errp) "%dk", 1 << MIN_CLUSTER_BITS, 1 << (MAX_CLUSTER_BITS - 10)); return false; } + + if (extended_l2) { + unsigned min_cluster_size = + (1 << MIN_CLUSTER_BITS) * QCOW_EXTL2_SUBCLUSTERS_PER_CLUSTER; + if (cluster_size < min_cluster_size) { + error_setg(errp, "Extended L2 entries are only supported with " + "cluster sizes of at least %u bytes", min_cluster_size); + return false; + } + } + return true; } -static size_t qcow2_opt_get_cluster_size_del(QemuOpts *opts, Error **errp) +static size_t qcow2_opt_get_cluster_size_del(QemuOpts *opts, bool extended_l2, + Error **errp) { size_t cluster_size; cluster_size = qemu_opt_get_size_del(opts, BLOCK_OPT_CLUSTER_SIZE, DEFAULT_CLUSTER_SIZE); - if (!validate_cluster_size(cluster_size, errp)) { + if (!validate_cluster_size(cluster_size, extended_l2, errp)) { return 0; } return cluster_size; @@ -3395,7 +3427,20 @@ qcow2_co_create(BlockdevCreateOptions *create_options, Error **errp) cluster_size = DEFAULT_CLUSTER_SIZE; } - if (!validate_cluster_size(cluster_size, errp)) { + if (!qcow2_opts->has_extended_l2) { + qcow2_opts->extended_l2 = false; + } + if (qcow2_opts->extended_l2) { + if (version < 3) { + error_setg(errp, "Extended L2 entries are only supported with " + "compatibility level 1.1 and above (use version=v3 or " + "greater)"); + ret = -EINVAL; + goto out; + } + } + + if (!validate_cluster_size(cluster_size, qcow2_opts->extended_l2, errp)) { ret = -EINVAL; goto out; } @@ -3404,10 +3449,11 @@ qcow2_co_create(BlockdevCreateOptions *create_options, Error **errp) qcow2_opts->preallocation = PREALLOC_MODE_OFF; } if (qcow2_opts->has_backing_file && - qcow2_opts->preallocation != PREALLOC_MODE_OFF) + qcow2_opts->preallocation != PREALLOC_MODE_OFF && + !qcow2_opts->extended_l2) { - error_setg(errp, "Backing file and preallocation cannot be used at " - "the same time"); + error_setg(errp, "Backing file and preallocation can only be used at " + "the same time if extended_l2 is on"); ret = -EINVAL; goto out; } @@ -3546,6 +3592,11 @@ qcow2_co_create(BlockdevCreateOptions *create_options, Error **errp) cpu_to_be64(QCOW2_INCOMPAT_COMPRESSION); } + if (qcow2_opts->extended_l2) { + header->incompatible_features |= + cpu_to_be64(QCOW2_INCOMPAT_EXTL2); + } + ret = blk_pwrite(blk, 0, header, cluster_size, 0); g_free(header); if (ret < 0) { @@ -3723,6 +3774,7 @@ static int coroutine_fn qcow2_co_create_opts(BlockDriver *drv, { BLOCK_OPT_BACKING_FMT, "backing-fmt" }, { BLOCK_OPT_CLUSTER_SIZE, "cluster-size" }, { BLOCK_OPT_LAZY_REFCOUNTS, "lazy-refcounts" }, + { BLOCK_OPT_EXTL2, "extended-l2" }, { BLOCK_OPT_REFCOUNT_BITS, "refcount-bits" }, { BLOCK_OPT_ENCRYPT, BLOCK_OPT_ENCRYPT_FORMAT }, { BLOCK_OPT_COMPAT_LEVEL, "version" }, @@ -3830,8 +3882,9 @@ static coroutine_fn int qcow2_co_pwrite_zeroes(BlockDriverState *bs, int ret; BDRVQcow2State *s = bs->opaque; - uint32_t head = offset % s->cluster_size; - uint32_t tail = (offset + bytes) % s->cluster_size; + uint32_t head = offset_into_subcluster(s, offset); + uint32_t tail = ROUND_UP(offset + bytes, s->subcluster_size) - + (offset + bytes); trace_qcow2_pwrite_zeroes_start_req(qemu_coroutine_self(), offset, bytes); if (offset + bytes == bs->total_sectors * BDRV_SECTOR_SIZE) { @@ -3841,25 +3894,27 @@ static coroutine_fn int qcow2_co_pwrite_zeroes(BlockDriverState *bs, if (head || tail) { uint64_t off; unsigned int nr; + QCow2SubclusterType type; - assert(head + bytes <= s->cluster_size); + assert(head + bytes + tail <= s->subcluster_size); /* check whether remainder of cluster already reads as zero */ if (!(is_zero(bs, offset - head, head) && - is_zero(bs, offset + bytes, - tail ? s->cluster_size - tail : 0))) { + is_zero(bs, offset + bytes, tail))) { return -ENOTSUP; } qemu_co_mutex_lock(&s->lock); /* We can have new write after previous check */ - offset = QEMU_ALIGN_DOWN(offset, s->cluster_size); - bytes = s->cluster_size; - nr = s->cluster_size; - ret = qcow2_get_cluster_offset(bs, offset, &nr, &off); - if (ret != QCOW2_CLUSTER_UNALLOCATED && - ret != QCOW2_CLUSTER_ZERO_PLAIN && - ret != QCOW2_CLUSTER_ZERO_ALLOC) { + offset -= head; + bytes = s->subcluster_size; + nr = s->subcluster_size; + ret = qcow2_get_host_offset(bs, offset, &nr, &off, &type); + if (ret < 0 || + (type != QCOW2_SUBCLUSTER_UNALLOCATED_PLAIN && + type != QCOW2_SUBCLUSTER_UNALLOCATED_ALLOC && + type != QCOW2_SUBCLUSTER_ZERO_PLAIN && + type != QCOW2_SUBCLUSTER_ZERO_ALLOC)) { qemu_co_mutex_unlock(&s->lock); return -ENOTSUP; } @@ -3869,8 +3924,8 @@ static coroutine_fn int qcow2_co_pwrite_zeroes(BlockDriverState *bs, trace_qcow2_pwrite_zeroes(qemu_coroutine_self(), offset, bytes); - /* Whatever is left can use real zero clusters */ - ret = qcow2_cluster_zeroize(bs, offset, bytes, flags); + /* Whatever is left can use real zero subclusters */ + ret = qcow2_subcluster_zeroize(bs, offset, bytes, flags); qemu_co_mutex_unlock(&s->lock); return ret; @@ -3923,17 +3978,20 @@ qcow2_co_copy_range_from(BlockDriverState *bs, while (bytes != 0) { uint64_t copy_offset = 0; + QCow2SubclusterType type; /* prepare next request */ cur_bytes = MIN(bytes, INT_MAX); cur_write_flags = write_flags; - ret = qcow2_get_cluster_offset(bs, src_offset, &cur_bytes, ©_offset); + ret = qcow2_get_host_offset(bs, src_offset, &cur_bytes, + ©_offset, &type); if (ret < 0) { goto out; } - switch (ret) { - case QCOW2_CLUSTER_UNALLOCATED: + switch (type) { + case QCOW2_SUBCLUSTER_UNALLOCATED_PLAIN: + case QCOW2_SUBCLUSTER_UNALLOCATED_ALLOC: if (bs->backing && bs->backing->bs) { int64_t backing_length = bdrv_getlength(bs->backing->bs); if (src_offset >= backing_length) { @@ -3948,18 +4006,17 @@ qcow2_co_copy_range_from(BlockDriverState *bs, } break; - case QCOW2_CLUSTER_ZERO_PLAIN: - case QCOW2_CLUSTER_ZERO_ALLOC: + case QCOW2_SUBCLUSTER_ZERO_PLAIN: + case QCOW2_SUBCLUSTER_ZERO_ALLOC: cur_write_flags |= BDRV_REQ_ZERO_WRITE; break; - case QCOW2_CLUSTER_COMPRESSED: + case QCOW2_SUBCLUSTER_COMPRESSED: ret = -ENOTSUP; goto out; - case QCOW2_CLUSTER_NORMAL: + case QCOW2_SUBCLUSTER_NORMAL: child = s->data_file; - copy_offset += offset_into_cluster(s, src_offset); break; default: @@ -4206,6 +4263,7 @@ static int coroutine_fn qcow2_co_truncate(BlockDriverState *bs, int64_t offset, int64_t clusters_allocated; int64_t old_file_size, last_cluster, new_file_size; uint64_t nb_new_data_clusters, nb_new_l2_tables; + bool subclusters_need_allocation = false; /* With a data file, preallocation means just allocating the metadata * and forwarding the truncate request to the data file */ @@ -4246,7 +4304,7 @@ static int coroutine_fn qcow2_co_truncate(BlockDriverState *bs, int64_t offset, * preallocation. All that matters is that we will not have to allocate * new refcount structures for them.) */ nb_new_l2_tables = DIV_ROUND_UP(nb_new_data_clusters, - s->cluster_size / sizeof(uint64_t)); + s->cluster_size / l2_entry_size(s)); /* The cluster range may not be aligned to L2 boundaries, so add one L2 * table for a potential head/tail */ nb_new_l2_tables++; @@ -4287,6 +4345,8 @@ static int coroutine_fn qcow2_co_truncate(BlockDriverState *bs, int64_t offset, BDRV_REQ_ZERO_WRITE, NULL); if (ret >= 0) { flags &= ~BDRV_REQ_ZERO_WRITE; + /* Ensure that we read zeroes and not backing file data */ + subclusters_need_allocation = true; } } else { ret = -1; @@ -4325,6 +4385,7 @@ static int coroutine_fn qcow2_co_truncate(BlockDriverState *bs, int64_t offset, .offset = nb_clusters << s->cluster_bits, .nb_bytes = 0, }, + .prealloc = !subclusters_need_allocation, }; qemu_co_queue_init(&allocation.dependent_requests); @@ -4349,15 +4410,16 @@ static int coroutine_fn qcow2_co_truncate(BlockDriverState *bs, int64_t offset, } if ((flags & BDRV_REQ_ZERO_WRITE) && offset > old_length) { - uint64_t zero_start = QEMU_ALIGN_UP(old_length, s->cluster_size); + uint64_t zero_start = QEMU_ALIGN_UP(old_length, s->subcluster_size); /* - * Use zero clusters as much as we can. qcow2_cluster_zeroize() - * requires a cluster-aligned start. The end may be unaligned if it is - * at the end of the image (which it is here). + * Use zero clusters as much as we can. qcow2_subcluster_zeroize() + * requires a subcluster-aligned start. The end may be unaligned if + * it is at the end of the image (which it is here). */ if (offset > zero_start) { - ret = qcow2_cluster_zeroize(bs, zero_start, offset - zero_start, 0); + ret = qcow2_subcluster_zeroize(bs, zero_start, offset - zero_start, + 0); if (ret < 0) { error_setg_errno(errp, -ret, "Failed to zero out new clusters"); goto fail; @@ -4487,7 +4549,7 @@ static coroutine_fn int qcow2_co_pwritev_compressed_task_entry(AioTask *task) { Qcow2AioTask *t = container_of(task, Qcow2AioTask, task); - assert(!t->cluster_type && !t->l2meta); + assert(!t->subcluster_type && !t->l2meta); return qcow2_co_pwritev_compressed_task(t->bs, t->offset, t->bytes, t->qiov, t->qiov_offset); @@ -4562,7 +4624,7 @@ qcow2_co_pwritev_compressed_part(BlockDriverState *bs, static int coroutine_fn qcow2_co_preadv_compressed(BlockDriverState *bs, - uint64_t file_cluster_offset, + uint64_t cluster_descriptor, uint64_t offset, uint64_t bytes, QEMUIOVector *qiov, @@ -4574,8 +4636,8 @@ qcow2_co_preadv_compressed(BlockDriverState *bs, uint8_t *buf, *out_buf; int offset_in_cluster = offset_into_cluster(s, offset); - coffset = file_cluster_offset & s->cluster_offset_mask; - nb_csectors = ((file_cluster_offset >> s->csize_shift) & s->csize_mask) + 1; + coffset = cluster_descriptor & s->cluster_offset_mask; + nb_csectors = ((cluster_descriptor >> s->csize_shift) & s->csize_mask) + 1; csize = nb_csectors * QCOW2_COMPRESSED_SECTOR_SIZE - (coffset & ~QCOW2_COMPRESSED_SECTOR_MASK); @@ -4829,9 +4891,14 @@ static BlockMeasureInfo *qcow2_measure(QemuOpts *opts, BlockDriverState *in_bs, PreallocMode prealloc; bool has_backing_file; bool has_luks; + bool extended_l2; + size_t l2e_size; /* Parse image creation options */ - cluster_size = qcow2_opt_get_cluster_size_del(opts, &local_err); + extended_l2 = qemu_opt_get_bool_del(opts, BLOCK_OPT_EXTL2, false); + + cluster_size = qcow2_opt_get_cluster_size_del(opts, extended_l2, + &local_err); if (local_err) { goto err; } @@ -4887,8 +4954,9 @@ static BlockMeasureInfo *qcow2_measure(QemuOpts *opts, BlockDriverState *in_bs, virtual_size = ROUND_UP(virtual_size, cluster_size); /* Check that virtual disk size is valid */ + l2e_size = extended_l2 ? L2E_SIZE_EXTENDED : L2E_SIZE_NORMAL; l2_tables = DIV_ROUND_UP(virtual_size / cluster_size, - cluster_size / sizeof(uint64_t)); + cluster_size / l2e_size); if (l2_tables * sizeof(uint64_t) > QCOW_MAX_L1_SIZE) { error_setg(&local_err, "The image size is too large " "(try using a larger cluster size)"); @@ -4951,9 +5019,9 @@ static BlockMeasureInfo *qcow2_measure(QemuOpts *opts, BlockDriverState *in_bs, } info = g_new0(BlockMeasureInfo, 1); - info->fully_allocated = + info->fully_allocated = luks_payload_size + qcow2_calc_prealloc_size(virtual_size, cluster_size, - ctz32(refcount_bits)) + luks_payload_size; + ctz32(refcount_bits), extended_l2); /* * Remove data clusters that are not required. This overestimates the @@ -5026,6 +5094,8 @@ static ImageInfoSpecific *qcow2_get_specific_info(BlockDriverState *bs, .corrupt = s->incompatible_features & QCOW2_INCOMPAT_CORRUPT, .has_corrupt = true, + .has_extended_l2 = true, + .extended_l2 = has_subclusters(s), .refcount_bits = s->refcount_bits, .has_bitmaps = !!bitmaps, .bitmaps = bitmaps, @@ -5754,6 +5824,12 @@ static QemuOptsList qcow2_create_opts = { .def_value_str = stringify(DEFAULT_CLUSTER_SIZE) \ }, \ { \ + .name = BLOCK_OPT_EXTL2, \ + .type = QEMU_OPT_BOOL, \ + .help = "Extended L2 tables", \ + .def_value_str = "off" \ + }, \ + { \ .name = BLOCK_OPT_PREALLOC, \ .type = QEMU_OPT_STRING, \ .help = "Preallocation mode (allowed values: off, " \ diff --git a/block/qcow2.h b/block/qcow2.h index 7ce2c23bdb..065ec3df0b 100644 --- a/block/qcow2.h +++ b/block/qcow2.h @@ -78,6 +78,27 @@ /* The cluster reads as all zeros */ #define QCOW_OFLAG_ZERO (1ULL << 0) +#define QCOW_EXTL2_SUBCLUSTERS_PER_CLUSTER 32 + +/* The subcluster X [0..31] is allocated */ +#define QCOW_OFLAG_SUB_ALLOC(X) (1ULL << (X)) +/* The subcluster X [0..31] reads as zeroes */ +#define QCOW_OFLAG_SUB_ZERO(X) (QCOW_OFLAG_SUB_ALLOC(X) << 32) +/* Subclusters [X, Y) (0 <= X <= Y <= 32) are allocated */ +#define QCOW_OFLAG_SUB_ALLOC_RANGE(X, Y) \ + (QCOW_OFLAG_SUB_ALLOC(Y) - QCOW_OFLAG_SUB_ALLOC(X)) +/* Subclusters [X, Y) (0 <= X <= Y <= 32) read as zeroes */ +#define QCOW_OFLAG_SUB_ZERO_RANGE(X, Y) \ + (QCOW_OFLAG_SUB_ALLOC_RANGE(X, Y) << 32) +/* L2 entry bitmap with all allocation bits set */ +#define QCOW_L2_BITMAP_ALL_ALLOC (QCOW_OFLAG_SUB_ALLOC_RANGE(0, 32)) +/* L2 entry bitmap with all "read as zeroes" bits set */ +#define QCOW_L2_BITMAP_ALL_ZEROES (QCOW_OFLAG_SUB_ZERO_RANGE(0, 32)) + +/* Size of normal and extended L2 entries */ +#define L2E_SIZE_NORMAL (sizeof(uint64_t)) +#define L2E_SIZE_EXTENDED (sizeof(uint64_t) * 2) + #define MIN_CLUSTER_BITS 9 #define MAX_CLUSTER_BITS 21 @@ -225,15 +246,18 @@ enum { QCOW2_INCOMPAT_CORRUPT_BITNR = 1, QCOW2_INCOMPAT_DATA_FILE_BITNR = 2, QCOW2_INCOMPAT_COMPRESSION_BITNR = 3, + QCOW2_INCOMPAT_EXTL2_BITNR = 4, QCOW2_INCOMPAT_DIRTY = 1 << QCOW2_INCOMPAT_DIRTY_BITNR, QCOW2_INCOMPAT_CORRUPT = 1 << QCOW2_INCOMPAT_CORRUPT_BITNR, QCOW2_INCOMPAT_DATA_FILE = 1 << QCOW2_INCOMPAT_DATA_FILE_BITNR, QCOW2_INCOMPAT_COMPRESSION = 1 << QCOW2_INCOMPAT_COMPRESSION_BITNR, + QCOW2_INCOMPAT_EXTL2 = 1 << QCOW2_INCOMPAT_EXTL2_BITNR, QCOW2_INCOMPAT_MASK = QCOW2_INCOMPAT_DIRTY | QCOW2_INCOMPAT_CORRUPT | QCOW2_INCOMPAT_DATA_FILE - | QCOW2_INCOMPAT_COMPRESSION, + | QCOW2_INCOMPAT_COMPRESSION + | QCOW2_INCOMPAT_EXTL2, }; /* Compatible feature bits */ @@ -295,6 +319,9 @@ typedef struct BDRVQcow2State { int cluster_bits; int cluster_size; int l2_slice_size; + int subcluster_bits; + int subcluster_size; + int subclusters_per_cluster; int l2_bits; int l2_size; int l1_size; @@ -440,6 +467,14 @@ typedef struct QCowL2Meta bool skip_cow; /** + * Indicates that this is not a normal write request but a preallocation. + * If the image has extended L2 entries this means that no new individual + * subclusters will be marked as allocated in the L2 bitmap (but any + * existing contents of that bitmap will be kept). + */ + bool prealloc; + + /** * The I/O vector with the data from the actual guest write request. * If non-NULL, this is meant to be merged together with the data * from @cow_start and @cow_end into one single write operation. @@ -453,6 +488,33 @@ typedef struct QCowL2Meta QLIST_ENTRY(QCowL2Meta) next_in_flight; } QCowL2Meta; +/* + * In images with standard L2 entries all clusters are treated as if + * they had one subcluster so QCow2ClusterType and QCow2SubclusterType + * can be mapped to each other and have the exact same meaning + * (QCOW2_SUBCLUSTER_UNALLOCATED_ALLOC cannot happen in these images). + * + * In images with extended L2 entries QCow2ClusterType refers to the + * complete cluster and QCow2SubclusterType to each of the individual + * subclusters, so there are several possible combinations: + * + * |--------------+---------------------------| + * | Cluster type | Possible subcluster types | + * |--------------+---------------------------| + * | UNALLOCATED | UNALLOCATED_PLAIN | + * | | ZERO_PLAIN | + * |--------------+---------------------------| + * | NORMAL | UNALLOCATED_ALLOC | + * | | ZERO_ALLOC | + * | | NORMAL | + * |--------------+---------------------------| + * | COMPRESSED | COMPRESSED | + * |--------------+---------------------------| + * + * QCOW2_SUBCLUSTER_INVALID means that the L2 entry is incorrect and + * the image should be marked corrupt. + */ + typedef enum QCow2ClusterType { QCOW2_CLUSTER_UNALLOCATED, QCOW2_CLUSTER_ZERO_PLAIN, @@ -461,6 +523,16 @@ typedef enum QCow2ClusterType { QCOW2_CLUSTER_COMPRESSED, } QCow2ClusterType; +typedef enum QCow2SubclusterType { + QCOW2_SUBCLUSTER_UNALLOCATED_PLAIN, + QCOW2_SUBCLUSTER_UNALLOCATED_ALLOC, + QCOW2_SUBCLUSTER_ZERO_PLAIN, + QCOW2_SUBCLUSTER_ZERO_ALLOC, + QCOW2_SUBCLUSTER_NORMAL, + QCOW2_SUBCLUSTER_COMPRESSED, + QCOW2_SUBCLUSTER_INVALID, +} QCow2SubclusterType; + typedef enum QCow2MetadataOverlap { QCOW2_OL_MAIN_HEADER_BITNR = 0, QCOW2_OL_ACTIVE_L1_BITNR = 1, @@ -510,6 +582,49 @@ typedef enum QCow2MetadataOverlap { #define INV_OFFSET (-1ULL) +static inline bool has_subclusters(BDRVQcow2State *s) +{ + return s->incompatible_features & QCOW2_INCOMPAT_EXTL2; +} + +static inline size_t l2_entry_size(BDRVQcow2State *s) +{ + return has_subclusters(s) ? L2E_SIZE_EXTENDED : L2E_SIZE_NORMAL; +} + +static inline uint64_t get_l2_entry(BDRVQcow2State *s, uint64_t *l2_slice, + int idx) +{ + idx *= l2_entry_size(s) / sizeof(uint64_t); + return be64_to_cpu(l2_slice[idx]); +} + +static inline uint64_t get_l2_bitmap(BDRVQcow2State *s, uint64_t *l2_slice, + int idx) +{ + if (has_subclusters(s)) { + idx *= l2_entry_size(s) / sizeof(uint64_t); + return be64_to_cpu(l2_slice[idx + 1]); + } else { + return 0; /* For convenience only; this value has no meaning. */ + } +} + +static inline void set_l2_entry(BDRVQcow2State *s, uint64_t *l2_slice, + int idx, uint64_t entry) +{ + idx *= l2_entry_size(s) / sizeof(uint64_t); + l2_slice[idx] = cpu_to_be64(entry); +} + +static inline void set_l2_bitmap(BDRVQcow2State *s, uint64_t *l2_slice, + int idx, uint64_t bitmap) +{ + assert(has_subclusters(s)); + idx *= l2_entry_size(s) / sizeof(uint64_t); + l2_slice[idx + 1] = cpu_to_be64(bitmap); +} + static inline bool has_data_file(BlockDriverState *bs) { BDRVQcow2State *s = bs->opaque; @@ -532,11 +647,21 @@ static inline int64_t offset_into_cluster(BDRVQcow2State *s, int64_t offset) return offset & (s->cluster_size - 1); } +static inline int64_t offset_into_subcluster(BDRVQcow2State *s, int64_t offset) +{ + return offset & (s->subcluster_size - 1); +} + static inline uint64_t size_to_clusters(BDRVQcow2State *s, uint64_t size) { return (size + (s->cluster_size - 1)) >> s->cluster_bits; } +static inline uint64_t size_to_subclusters(BDRVQcow2State *s, uint64_t size) +{ + return (size + (s->subcluster_size - 1)) >> s->subcluster_bits; +} + static inline int64_t size_to_l1(BDRVQcow2State *s, int64_t size) { int shift = s->cluster_bits + s->l2_bits; @@ -558,6 +683,11 @@ static inline int offset_to_l2_slice_index(BDRVQcow2State *s, int64_t offset) return (offset >> s->cluster_bits) & (s->l2_slice_size - 1); } +static inline int offset_to_sc_index(BDRVQcow2State *s, int64_t offset) +{ + return (offset >> s->subcluster_bits) & (s->subclusters_per_cluster - 1); +} + static inline int64_t qcow2_vm_state_offset(BDRVQcow2State *s) { return (int64_t)s->l1_vm_state_index << (s->cluster_bits + s->l2_bits); @@ -566,9 +696,11 @@ static inline int64_t qcow2_vm_state_offset(BDRVQcow2State *s) static inline QCow2ClusterType qcow2_get_cluster_type(BlockDriverState *bs, uint64_t l2_entry) { + BDRVQcow2State *s = bs->opaque; + if (l2_entry & QCOW_OFLAG_COMPRESSED) { return QCOW2_CLUSTER_COMPRESSED; - } else if (l2_entry & QCOW_OFLAG_ZERO) { + } else if ((l2_entry & QCOW_OFLAG_ZERO) && !has_subclusters(s)) { if (l2_entry & L2E_OFFSET_MASK) { return QCOW2_CLUSTER_ZERO_ALLOC; } @@ -588,6 +720,72 @@ static inline QCow2ClusterType qcow2_get_cluster_type(BlockDriverState *bs, } } +/* + * In an image without subsclusters @l2_bitmap is ignored and + * @sc_index must be 0. + * Return QCOW2_SUBCLUSTER_INVALID if an invalid l2 entry is detected + * (this checks the whole entry and bitmap, not only the bits related + * to subcluster @sc_index). + */ +static inline +QCow2SubclusterType qcow2_get_subcluster_type(BlockDriverState *bs, + uint64_t l2_entry, + uint64_t l2_bitmap, + unsigned sc_index) +{ + BDRVQcow2State *s = bs->opaque; + QCow2ClusterType type = qcow2_get_cluster_type(bs, l2_entry); + assert(sc_index < s->subclusters_per_cluster); + + if (has_subclusters(s)) { + switch (type) { + case QCOW2_CLUSTER_COMPRESSED: + return QCOW2_SUBCLUSTER_COMPRESSED; + case QCOW2_CLUSTER_NORMAL: + if ((l2_bitmap >> 32) & l2_bitmap) { + return QCOW2_SUBCLUSTER_INVALID; + } else if (l2_bitmap & QCOW_OFLAG_SUB_ZERO(sc_index)) { + return QCOW2_SUBCLUSTER_ZERO_ALLOC; + } else if (l2_bitmap & QCOW_OFLAG_SUB_ALLOC(sc_index)) { + return QCOW2_SUBCLUSTER_NORMAL; + } else { + return QCOW2_SUBCLUSTER_UNALLOCATED_ALLOC; + } + case QCOW2_CLUSTER_UNALLOCATED: + if (l2_bitmap & QCOW_L2_BITMAP_ALL_ALLOC) { + return QCOW2_SUBCLUSTER_INVALID; + } else if (l2_bitmap & QCOW_OFLAG_SUB_ZERO(sc_index)) { + return QCOW2_SUBCLUSTER_ZERO_PLAIN; + } else { + return QCOW2_SUBCLUSTER_UNALLOCATED_PLAIN; + } + default: + g_assert_not_reached(); + } + } else { + switch (type) { + case QCOW2_CLUSTER_COMPRESSED: + return QCOW2_SUBCLUSTER_COMPRESSED; + case QCOW2_CLUSTER_ZERO_PLAIN: + return QCOW2_SUBCLUSTER_ZERO_PLAIN; + case QCOW2_CLUSTER_ZERO_ALLOC: + return QCOW2_SUBCLUSTER_ZERO_ALLOC; + case QCOW2_CLUSTER_NORMAL: + return QCOW2_SUBCLUSTER_NORMAL; + case QCOW2_CLUSTER_UNALLOCATED: + return QCOW2_SUBCLUSTER_UNALLOCATED_PLAIN; + default: + g_assert_not_reached(); + } + } +} + +static inline bool qcow2_cluster_is_allocated(QCow2ClusterType type) +{ + return (type == QCOW2_CLUSTER_COMPRESSED || type == QCOW2_CLUSTER_NORMAL || + type == QCOW2_CLUSTER_ZERO_ALLOC); +} + /* Check whether refcounts are eager or lazy */ static inline bool qcow2_need_accurate_refcounts(BDRVQcow2State *s) { @@ -694,8 +892,9 @@ int qcow2_write_l1_entry(BlockDriverState *bs, int l1_index); int qcow2_encrypt_sectors(BDRVQcow2State *s, int64_t sector_num, uint8_t *buf, int nb_sectors, bool enc, Error **errp); -int qcow2_get_cluster_offset(BlockDriverState *bs, uint64_t offset, - unsigned int *bytes, uint64_t *cluster_offset); +int qcow2_get_host_offset(BlockDriverState *bs, uint64_t offset, + unsigned int *bytes, uint64_t *host_offset, + QCow2SubclusterType *subcluster_type); int qcow2_alloc_cluster_offset(BlockDriverState *bs, uint64_t offset, unsigned int *bytes, uint64_t *host_offset, QCowL2Meta **m); @@ -709,8 +908,8 @@ void qcow2_alloc_cluster_abort(BlockDriverState *bs, QCowL2Meta *m); int qcow2_cluster_discard(BlockDriverState *bs, uint64_t offset, uint64_t bytes, enum qcow2_discard_type type, bool full_discard); -int qcow2_cluster_zeroize(BlockDriverState *bs, uint64_t offset, - uint64_t bytes, int flags); +int qcow2_subcluster_zeroize(BlockDriverState *bs, uint64_t offset, + uint64_t bytes, int flags); int qcow2_expand_zero_clusters(BlockDriverState *bs, BlockDriverAmendStatusCB *status_cb, diff --git a/block/trace-events b/block/trace-events index 9158335061..e1c79a910d 100644 --- a/block/trace-events +++ b/block/trace-events @@ -77,7 +77,7 @@ luring_io_uring_submit(void *s, int ret) "LuringState %p ret %d" luring_resubmit_short_read(void *s, void *luringcb, int nread) "LuringState %p luringcb %p nread %d" # qcow2.c -qcow2_add_task(void *co, void *bs, void *pool, const char *action, int cluster_type, uint64_t file_cluster_offset, uint64_t offset, uint64_t bytes, void *qiov, size_t qiov_offset) "co %p bs %p pool %p: %s: cluster_type %d file_cluster_offset %" PRIu64 " offset %" PRIu64 " bytes %" PRIu64 " qiov %p qiov_offset %zu" +qcow2_add_task(void *co, void *bs, void *pool, const char *action, int cluster_type, uint64_t host_offset, uint64_t offset, uint64_t bytes, void *qiov, size_t qiov_offset) "co %p bs %p pool %p: %s: cluster_type %d file_cluster_offset %" PRIu64 " offset %" PRIu64 " bytes %" PRIu64 " qiov %p qiov_offset %zu" qcow2_writev_start_req(void *co, int64_t offset, int bytes) "co %p offset 0x%" PRIx64 " bytes %d" qcow2_writev_done_req(void *co, int ret) "co %p ret %d" qcow2_writev_start_part(void *co) "co %p" diff --git a/docs/interop/qcow2.txt b/docs/interop/qcow2.txt index f072e27900..7da0d81df8 100644 --- a/docs/interop/qcow2.txt +++ b/docs/interop/qcow2.txt @@ -42,6 +42,9 @@ The first cluster of a qcow2 image contains the file header: as the maximum cluster size and won't be able to open images with larger cluster sizes. + Note: if the image has Extended L2 Entries then cluster_bits + must be at least 14 (i.e. 16384 byte clusters). + 24 - 31: size Virtual disk size in bytes. @@ -117,7 +120,12 @@ the next fields through header_length. clusters. The compression_type field must be present and not zero. - Bits 4-63: Reserved (set to 0) + Bit 4: Extended L2 Entries. If this bit is set then + L2 table entries use an extended format that + allows subcluster-based allocation. See the + Extended L2 Entries section for more details. + + Bits 5-63: Reserved (set to 0) 80 - 87: compatible_features Bitmask of compatible features. An implementation can @@ -498,7 +506,7 @@ cannot be relaxed without an incompatible layout change). Given an offset into the virtual disk, the offset into the image file can be obtained as follows: - l2_entries = (cluster_size / sizeof(uint64_t)) + l2_entries = (cluster_size / sizeof(uint64_t)) [*] l2_index = (offset / cluster_size) % l2_entries l1_index = (offset / cluster_size) / l2_entries @@ -508,6 +516,8 @@ obtained as follows: return cluster_offset + (offset % cluster_size) + [*] this changes if Extended L2 Entries are enabled, see next section + L1 table entry: Bit 0 - 8: Reserved (set to 0) @@ -548,7 +558,8 @@ Standard Cluster Descriptor: nor is data read from the backing file if the cluster is unallocated. - With version 2, this is always 0. + With version 2 or with extended L2 entries (see the next + section), this is always 0. 1 - 8: Reserved (set to 0) @@ -585,6 +596,57 @@ file (except if bit 0 in the Standard Cluster Descriptor is set). If there is no backing file or the backing file is smaller than the image, they shall read zeros for all parts that are not covered by the backing file. +== Extended L2 Entries == + +An image uses Extended L2 Entries if bit 4 is set on the incompatible_features +field of the header. + +In these images standard data clusters are divided into 32 subclusters of the +same size. They are contiguous and start from the beginning of the cluster. +Subclusters can be allocated independently and the L2 entry contains information +indicating the status of each one of them. Compressed data clusters don't have +subclusters so they are treated the same as in images without this feature. + +The size of an extended L2 entry is 128 bits so the number of entries per table +is calculated using this formula: + + l2_entries = (cluster_size / (2 * sizeof(uint64_t))) + +The first 64 bits have the same format as the standard L2 table entry described +in the previous section, with the exception of bit 0 of the standard cluster +descriptor. + +The last 64 bits contain a subcluster allocation bitmap with this format: + +Subcluster Allocation Bitmap (for standard clusters): + + Bit 0 - 31: Allocation status (one bit per subcluster) + + 1: the subcluster is allocated. In this case the + host cluster offset field must contain a valid + offset. + 0: the subcluster is not allocated. In this case + read requests shall go to the backing file or + return zeros if there is no backing file data. + + Bits are assigned starting from the least significant + one (i.e. bit x is used for subcluster x). + + 32 - 63 Subcluster reads as zeros (one bit per subcluster) + + 1: the subcluster reads as zeros. In this case the + allocation status bit must be unset. The host + cluster offset field may or may not be set. + 0: no effect. + + Bits are assigned starting from the least significant + one (i.e. bit x is used for subcluster x - 32). + +Subcluster Allocation Bitmap (for compressed clusters): + + Bit 0 - 63: Reserved (set to 0) + Compressed clusters don't have subclusters, + so this field is not used. == Snapshots == diff --git a/docs/qcow2-cache.txt b/docs/qcow2-cache.txt index d57f409861..5f763aa6bb 100644 --- a/docs/qcow2-cache.txt +++ b/docs/qcow2-cache.txt @@ -1,6 +1,6 @@ qcow2 L2/refcount cache configuration ===================================== -Copyright (C) 2015, 2018 Igalia, S.L. +Copyright (C) 2015, 2018-2020 Igalia, S.L. Author: Alberto Garcia <berto@igalia.com> This work is licensed under the terms of the GNU GPL, version 2 or @@ -222,3 +222,20 @@ support this functionality, and is 0 (disabled) on other platforms. This functionality currently relies on the MADV_DONTNEED argument for madvise() to actually free the memory. This is a Linux-specific feature, so cache-clean-interval is not supported on other systems. + + +Extended L2 Entries +------------------- +All numbers shown in this document are valid for qcow2 images with normal +64-bit L2 entries. + +Images with extended L2 entries need twice as much L2 metadata, so the L2 +cache size must be twice as large for the same disk space. + + disk_size = l2_cache_size * cluster_size / 16 + +i.e. + + l2_cache_size = disk_size * 16 / cluster_size + +Refcount blocks are not affected by this. diff --git a/docs/system/s390x/bootdevices.rst b/docs/system/s390x/bootdevices.rst new file mode 100644 index 0000000000..9e591cb9dc --- /dev/null +++ b/docs/system/s390x/bootdevices.rst @@ -0,0 +1,82 @@ +Boot devices on s390x +===================== + +Booting with bootindex parameter +-------------------------------- + +For classical mainframe guests (i.e. LPAR or z/VM installations), you always +have to explicitly specify the disk where you want to boot from (or "IPL" from, +in s390x-speak -- IPL means "Initial Program Load"). In particular, there can +also be only one boot device according to the architecture specification, thus +specifying multiple boot devices is not possible (yet). + +So for booting an s390x guest in QEMU, you should always mark the +device where you want to boot from with the ``bootindex`` property, for +example:: + + qemu-system-s390x -drive if=none,id=dr1,file=guest.qcow2 \ + -device virtio-blk,drive=dr1,bootindex=1 + +For booting from a CD-ROM ISO image (which needs to include El-Torito boot +information in order to be bootable), it is recommended to specify a ``scsi-cd`` +device, for example like this:: + + qemu-system-s390x -blockdev file,node-name=c1,filename=... \ + -device virtio-scsi \ + -device scsi-cd,drive=c1,bootindex=1 + +Note that you really have to use the ``bootindex`` property to select the +boot device. The old-fashioned ``-boot order=...`` command of QEMU (and +also ``-boot once=...``) is not supported on s390x. + + +Booting without bootindex parameter +----------------------------------- + +The QEMU guest firmware (the so-called s390-ccw bios) has also some rudimentary +support for scanning through the available block devices. So in case you did +not specify a boot device with the ``bootindex`` property, there is still a +chance that it finds a bootable device on its own and starts a guest operating +system from it. However, this scanning algorithm is still very rough and may +be incomplete, so that it might fail to detect a bootable device in many cases. +It is really recommended to always specify the boot device with the +``bootindex`` property instead. + +This also means that you should avoid the classical short-cut commands like +``-hda``, ``-cdrom`` or ``-drive if=virtio``, since it is not possible to +specify the ``bootindex`` with these commands. Note that the convenience +``-cdrom`` option even does not give you a real (virtio-scsi) CD-ROM device on +s390x. Due to technical limitations in the QEMU code base, you will get a +virtio-blk device with this parameter instead, which might not be the right +device type for installing a Linux distribution via ISO image. It is +recommended to specify a CD-ROM device via ``-device scsi-cd`` (as mentioned +above) instead. + + +Booting from a network device +----------------------------- + +Beside the normal guest firmware (which is loaded from the file ``s390-ccw.img`` +in the data directory of QEMU, or via the ``-bios`` option), QEMU ships with +a small TFTP network bootloader firmware for virtio-net-ccw devices, too. This +firmware is loaded from a file called ``s390-netboot.img`` in the QEMU data +directory. In case you want to load it from a different filename instead, +you can specify it via the ``-global s390-ipl.netboot_fw=filename`` +command line option. + +The ``bootindex`` property is especially important for booting via the network. +If you don't specify the the ``bootindex`` property here, the network bootloader +firmware code won't get loaded into the guest memory so that the network boot +will fail. For a successful network boot, try something like this:: + + qemu-system-s390x -netdev user,id=n1,tftp=...,bootfile=... \ + -device virtio-net-ccw,netdev=n1,bootindex=1 + +The network bootloader firmware also has basic support for pxelinux.cfg-style +configuration files. See the `PXELINUX Configuration page +<https://wiki.syslinux.org/wiki/index.php?title=PXELINUX#Configuration>`__ +for details how to set up the configuration file on your TFTP server. +The supported configuration file entries are ``DEFAULT``, ``LABEL``, +``KERNEL``, ``INITRD`` and ``APPEND`` (see the `Syslinux Config file syntax +<https://wiki.syslinux.org/wiki/index.php?title=Config>`__ for more +information). diff --git a/docs/system/target-s390x.rst b/docs/system/target-s390x.rst index 644e404ef9..c636f64113 100644 --- a/docs/system/target-s390x.rst +++ b/docs/system/target-s390x.rst @@ -31,4 +31,5 @@ Architectural features ====================== .. toctree:: + s390x/bootdevices s390x/protvirt diff --git a/hw/acpi/pcihp.c b/hw/acpi/pcihp.c index 9e31ab2da4..39b1f74442 100644 --- a/hw/acpi/pcihp.c +++ b/hw/acpi/pcihp.c @@ -104,6 +104,24 @@ static void acpi_set_pci_info(void) } } +static void acpi_pcihp_disable_root_bus(void) +{ + static bool root_hp_disabled; + PCIBus *bus; + + if (root_hp_disabled) { + return; + } + + bus = find_i440fx(); + if (bus) { + /* setting the hotplug handler to NULL makes the bus non-hotpluggable */ + qbus_set_hotplug_handler(BUS(bus), NULL); + } + root_hp_disabled = true; + return; +} + static void acpi_pcihp_test_hotplug_bus(PCIBus *bus, void *opaque) { AcpiPciHpFind *find = opaque; @@ -209,8 +227,11 @@ static void acpi_pcihp_update(AcpiPciHpState *s) } } -void acpi_pcihp_reset(AcpiPciHpState *s) +void acpi_pcihp_reset(AcpiPciHpState *s, bool acpihp_root_off) { + if (acpihp_root_off) { + acpi_pcihp_disable_root_bus(); + } acpi_set_pci_info(); acpi_pcihp_update(s); } diff --git a/hw/acpi/piix4.c b/hw/acpi/piix4.c index 26bac4f16c..e6163bb6ce 100644 --- a/hw/acpi/piix4.c +++ b/hw/acpi/piix4.c @@ -78,6 +78,7 @@ typedef struct PIIX4PMState { AcpiPciHpState acpi_pci_hotplug; bool use_acpi_hotplug_bridge; + bool use_acpi_root_pci_hotplug; uint8_t disable_s3; uint8_t disable_s4; @@ -324,7 +325,7 @@ static void piix4_pm_reset(DeviceState *dev) pci_conf[0x5B] = 0x02; } pm_io_space_update(s); - acpi_pcihp_reset(&s->acpi_pci_hotplug); + acpi_pcihp_reset(&s->acpi_pci_hotplug, !s->use_acpi_root_pci_hotplug); } static void piix4_pm_powerdown_req(Notifier *n, void *opaque) @@ -635,6 +636,8 @@ static Property piix4_pm_properties[] = { DEFINE_PROP_UINT8(ACPI_PM_PROP_S4_VAL, PIIX4PMState, s4_val, 2), DEFINE_PROP_BOOL("acpi-pci-hotplug-with-bridge-support", PIIX4PMState, use_acpi_hotplug_bridge, true), + DEFINE_PROP_BOOL("acpi-root-pci-hotplug", PIIX4PMState, + use_acpi_root_pci_hotplug, true), DEFINE_PROP_BOOL("memory-hotplug-support", PIIX4PMState, acpi_memory_hotplug.is_enabled, true), DEFINE_PROP_END_OF_LIST(), diff --git a/hw/arm/virt-acpi-build.c b/hw/arm/virt-acpi-build.c index 91f0df7b13..0a482ff6f7 100644 --- a/hw/arm/virt-acpi-build.c +++ b/hw/arm/virt-acpi-build.c @@ -170,7 +170,7 @@ static void acpi_dsdt_add_pci(Aml *scope, const MemMapEntry *memmap, aml_append(dev, aml_name_decl("_CID", aml_string("PNP0A03"))); aml_append(dev, aml_name_decl("_SEG", aml_int(0))); aml_append(dev, aml_name_decl("_BBN", aml_int(0))); - aml_append(dev, aml_name_decl("_UID", aml_string("PCI0"))); + aml_append(dev, aml_name_decl("_UID", aml_int(0))); aml_append(dev, aml_name_decl("_STR", aml_unicode("PCIe 0 Device"))); aml_append(dev, aml_name_decl("_CCA", aml_int(1))); diff --git a/hw/block/vhost-user-blk.c b/hw/block/vhost-user-blk.c index a00b854736..39aec42dae 100644 --- a/hw/block/vhost-user-blk.c +++ b/hw/block/vhost-user-blk.c @@ -420,6 +420,9 @@ static void vhost_user_blk_device_realize(DeviceState *dev, Error **errp) return; } + if (s->num_queues == VHOST_USER_BLK_AUTO_NUM_QUEUES) { + s->num_queues = 1; + } if (!s->num_queues || s->num_queues > VIRTIO_QUEUE_MAX) { error_setg(errp, "vhost-user-blk: invalid number of IO queues"); return; @@ -531,7 +534,8 @@ static const VMStateDescription vmstate_vhost_user_blk = { static Property vhost_user_blk_properties[] = { DEFINE_PROP_CHR("chardev", VHostUserBlk, chardev), - DEFINE_PROP_UINT16("num-queues", VHostUserBlk, num_queues, 1), + DEFINE_PROP_UINT16("num-queues", VHostUserBlk, num_queues, + VHOST_USER_BLK_AUTO_NUM_QUEUES), DEFINE_PROP_UINT32("queue-size", VHostUserBlk, queue_size, 128), DEFINE_PROP_BIT("config-wce", VHostUserBlk, config_wce, 0, true), DEFINE_PROP_END_OF_LIST(), diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c index 413783693c..2204ba149e 100644 --- a/hw/block/virtio-blk.c +++ b/hw/block/virtio-blk.c @@ -1147,6 +1147,9 @@ static void virtio_blk_device_realize(DeviceState *dev, Error **errp) error_setg(errp, "Device needs media, but drive is empty"); return; } + if (conf->num_queues == VIRTIO_BLK_AUTO_NUM_QUEUES) { + conf->num_queues = 1; + } if (!conf->num_queues) { error_setg(errp, "num-queues property must be larger than 0"); return; @@ -1281,7 +1284,8 @@ static Property virtio_blk_properties[] = { #endif DEFINE_PROP_BIT("request-merging", VirtIOBlock, conf.request_merging, 0, true), - DEFINE_PROP_UINT16("num-queues", VirtIOBlock, conf.num_queues, 1), + DEFINE_PROP_UINT16("num-queues", VirtIOBlock, conf.num_queues, + VIRTIO_BLK_AUTO_NUM_QUEUES), DEFINE_PROP_UINT16("queue-size", VirtIOBlock, conf.queue_size, 256), DEFINE_PROP_BOOL("seg-max-adjust", VirtIOBlock, conf.seg_max_adjust, true), DEFINE_PROP_LINK("iothread", VirtIOBlock, conf.iothread, TYPE_IOTHREAD, diff --git a/hw/core/machine.c b/hw/core/machine.c index cf5f2dfaeb..ea26d61237 100644 --- a/hw/core/machine.c +++ b/hw/core/machine.c @@ -28,7 +28,13 @@ #include "hw/mem/nvdimm.h" #include "migration/vmstate.h" -GlobalProperty hw_compat_5_1[] = {}; +GlobalProperty hw_compat_5_1[] = { + { "vhost-scsi", "num_queues", "1"}, + { "vhost-user-blk", "num-queues", "1"}, + { "vhost-user-scsi", "num_queues", "1"}, + { "virtio-blk-device", "num-queues", "1"}, + { "virtio-scsi-device", "num_queues", "1"}, +}; const size_t hw_compat_5_1_len = G_N_ELEMENTS(hw_compat_5_1); GlobalProperty hw_compat_5_0[] = { diff --git a/hw/display/artist.c b/hw/display/artist.c index 6261bfe65b..71982559c6 100644 --- a/hw/display/artist.c +++ b/hw/display/artist.c @@ -35,9 +35,9 @@ struct vram_buffer { MemoryRegion mr; uint8_t *data; - int size; - int width; - int height; + unsigned int size; + unsigned int width; + unsigned int height; }; typedef struct ARTISTState { @@ -206,7 +206,12 @@ static void artist_invalidate_lines(struct vram_buffer *buf, int starty, int height) { int start = starty * buf->width; - int size = height * buf->width; + int size; + + if (starty + height > buf->height) + height = buf->height - starty; + + size = height * buf->width; if (start + size <= buf->size) { memory_region_set_dirty(&buf->mr, start, size); @@ -273,11 +278,20 @@ static artist_rop_t artist_get_op(ARTISTState *s) return (s->image_bitmap_op >> 8) & 0xf; } -static void artist_rop8(ARTISTState *s, uint8_t *dst, uint8_t val) +static void artist_rop8(ARTISTState *s, struct vram_buffer *buf, + unsigned int offset, uint8_t val) { - const artist_rop_t op = artist_get_op(s); - uint8_t plane_mask = s->plane_mask & 0xff; + uint8_t plane_mask; + uint8_t *dst; + + if (offset >= buf->size) { + qemu_log_mask(LOG_GUEST_ERROR, + "rop8 offset:%u bufsize:%u\n", offset, buf->size); + return; + } + dst = buf->data + offset; + plane_mask = s->plane_mask & 0xff; switch (op) { case ARTIST_ROP_CLEAR: @@ -285,8 +299,7 @@ static void artist_rop8(ARTISTState *s, uint8_t *dst, uint8_t val) break; case ARTIST_ROP_COPY: - *dst &= ~plane_mask; - *dst |= val & plane_mask; + *dst = (*dst & ~plane_mask) | (val & plane_mask); break; case ARTIST_ROP_XOR: @@ -340,14 +353,14 @@ static void vram_bit_write(ARTISTState *s, int posx, int posy, bool incr_x, { struct vram_buffer *buf; uint32_t vram_bitmask = s->vram_bitmask; - int mask, i, pix_count, pix_length, offset, height, width; + int mask, i, pix_count, pix_length; + unsigned int offset, width; uint8_t *data8, *p; pix_count = vram_write_pix_per_transfer(s); pix_length = vram_pixel_length(s); buf = vram_write_buffer(s); - height = buf->height; width = buf->width; if (s->cmap_bm_access) { @@ -356,8 +369,7 @@ static void vram_bit_write(ARTISTState *s, int posx, int posy, bool incr_x, offset = posy * width + posx; } - if (!buf->size) { - qemu_log("write to non-existent buffer\n"); + if (!buf->size || offset >= buf->size) { return; } @@ -367,13 +379,6 @@ static void vram_bit_write(ARTISTState *s, int posx, int posy, bool incr_x, pix_count = size * 8; } - if (posy * width + posx + pix_count > buf->size) { - qemu_log("write outside bounds: wants %dx%d, max size %dx%d\n", - posx, posy, width, height); - return; - } - - switch (pix_length) { case 0: if (s->image_bitmap_op & 0x20000000) { @@ -381,8 +386,11 @@ static void vram_bit_write(ARTISTState *s, int posx, int posy, bool incr_x, } for (i = 0; i < pix_count; i++) { - artist_rop8(s, p + offset + pix_count - 1 - i, - (data & 1) ? (s->plane_mask >> 24) : 0); + uint32_t off = offset + pix_count - 1 - i; + if (off < buf->size) { + artist_rop8(s, buf, off, + (data & 1) ? (s->plane_mask >> 24) : 0); + } data >>= 1; } memory_region_set_dirty(&buf->mr, offset, pix_count); @@ -390,7 +398,9 @@ static void vram_bit_write(ARTISTState *s, int posx, int posy, bool incr_x, case 3: if (s->cmap_bm_access) { - *(uint32_t *)(p + offset) = data; + if (offset + 3 < buf->size) { + *(uint32_t *)(p + offset) = data; + } break; } data8 = (uint8_t *)&data; @@ -398,7 +408,10 @@ static void vram_bit_write(ARTISTState *s, int posx, int posy, bool incr_x, for (i = 3; i >= 0; i--) { if (!(s->image_bitmap_op & 0x20000000) || s->vram_bitmask & (1 << (28 + i))) { - artist_rop8(s, p + offset + 3 - i, data8[ROP8OFF(i)]); + uint32_t off = offset + 3 - i; + if (off < buf->size) { + artist_rop8(s, buf, off, data8[ROP8OFF(i)]); + } } } memory_region_set_dirty(&buf->mr, offset, 3); @@ -420,16 +433,16 @@ static void vram_bit_write(ARTISTState *s, int posx, int posy, bool incr_x, break; } - for (i = 0; i < pix_count; i++) { + for (i = 0; i < pix_count && offset + i < buf->size; i++) { mask = 1 << (pix_count - 1 - i); if (!(s->image_bitmap_op & 0x20000000) || (vram_bitmask & mask)) { if (data & mask) { - artist_rop8(s, p + offset + i, s->fg_color); + artist_rop8(s, buf, offset + i, s->fg_color); } else { if (!(s->image_bitmap_op & 0x10000002)) { - artist_rop8(s, p + offset + i, s->bg_color); + artist_rop8(s, buf, offset + i, s->bg_color); } } } @@ -457,12 +470,14 @@ static void vram_bit_write(ARTISTState *s, int posx, int posy, bool incr_x, } } -static void block_move(ARTISTState *s, int source_x, int source_y, int dest_x, - int dest_y, int width, int height) +static void block_move(ARTISTState *s, + unsigned int source_x, unsigned int source_y, + unsigned int dest_x, unsigned int dest_y, + unsigned int width, unsigned int height) { struct vram_buffer *buf; int line, endline, lineincr, startcolumn, endcolumn, columnincr, column; - uint32_t dst, src; + unsigned int dst, src; trace_artist_block_move(source_x, source_y, dest_x, dest_y, width, height); @@ -474,6 +489,12 @@ static void block_move(ARTISTState *s, int source_x, int source_y, int dest_x, } buf = &s->vram_buffer[ARTIST_BUFFER_AP]; + if (height > buf->height) { + height = buf->height; + } + if (width > buf->width) { + width = buf->width; + } if (dest_y > source_y) { /* move down */ @@ -500,24 +521,27 @@ static void block_move(ARTISTState *s, int source_x, int source_y, int dest_x, } for ( ; line != endline; line += lineincr) { - src = source_x + ((line + source_y) * buf->width); - dst = dest_x + ((line + dest_y) * buf->width); + src = source_x + ((line + source_y) * buf->width) + startcolumn; + dst = dest_x + ((line + dest_y) * buf->width) + startcolumn; for (column = startcolumn; column != endcolumn; column += columnincr) { - if (dst + column > buf->size || src + column > buf->size) { + if (dst >= buf->size || src >= buf->size) { continue; } - artist_rop8(s, buf->data + dst + column, buf->data[src + column]); + artist_rop8(s, buf, dst, buf->data[src]); + src += columnincr; + dst += columnincr; } } artist_invalidate_lines(buf, dest_y, height); } -static void fill_window(ARTISTState *s, int startx, int starty, - int width, int height) +static void fill_window(ARTISTState *s, + unsigned int startx, unsigned int starty, + unsigned int width, unsigned int height) { - uint32_t offset; + unsigned int offset; uint8_t color = artist_get_color(s); struct vram_buffer *buf; int x, y; @@ -548,23 +572,30 @@ static void fill_window(ARTISTState *s, int startx, int starty, offset = y * s->width; for (x = startx; x < startx + width; x++) { - artist_rop8(s, buf->data + offset + x, color); + artist_rop8(s, buf, offset + x, color); } } artist_invalidate_lines(buf, starty, height); } -static void draw_line(ARTISTState *s, int x1, int y1, int x2, int y2, +static void draw_line(ARTISTState *s, + unsigned int x1, unsigned int y1, + unsigned int x2, unsigned int y2, bool update_start, int skip_pix, int max_pix) { - struct vram_buffer *buf; + struct vram_buffer *buf = &s->vram_buffer[ARTIST_BUFFER_AP]; uint8_t color; int dx, dy, t, e, x, y, incy, diago, horiz; bool c1; - uint8_t *p; trace_artist_draw_line(x1, y1, x2, y2); + if ((x1 >= buf->width && x2 >= buf->width) || + (y1 >= buf->height && y2 >= buf->height)) { + return; + } + + if (update_start) { s->vram_start = (x2 << 16) | y2; } @@ -579,9 +610,6 @@ static void draw_line(ARTISTState *s, int x1, int y1, int x2, int y2, } else { dy = y1 - y2; } - if (!dx || !dy) { - return; - } c1 = false; if (dy > dx) { @@ -622,23 +650,23 @@ static void draw_line(ARTISTState *s, int x1, int y1, int x2, int y2, x = x1; y = y1; color = artist_get_color(s); - buf = &s->vram_buffer[ARTIST_BUFFER_AP]; do { + unsigned int ofs; + if (c1) { - p = buf->data + x * s->width + y; + ofs = x * s->width + y; } else { - p = buf->data + y * s->width + x; + ofs = y * s->width + x; } if (skip_pix > 0) { skip_pix--; } else { - artist_rop8(s, p, color); + artist_rop8(s, buf, ofs, color); } if (e > 0) { - artist_invalidate_lines(buf, y, 1); y += incy; e += diago; } else { @@ -646,6 +674,10 @@ static void draw_line(ARTISTState *s, int x1, int y1, int x2, int y2, } x++; } while (x <= x2 && (max_pix == -1 || --max_pix > 0)); + if (c1) + artist_invalidate_lines(buf, x, dy+1); + else + artist_invalidate_lines(buf, y, dx+1); } static void draw_line_pattern_start(ARTISTState *s) @@ -755,23 +787,24 @@ static void font_write16(ARTISTState *s, uint16_t val) uint16_t mask; int i; - int startx = artist_get_x(s->vram_start); - int starty = artist_get_y(s->vram_start) + s->font_write_pos_y; - int offset = starty * s->width + startx; + unsigned int startx = artist_get_x(s->vram_start); + unsigned int starty = artist_get_y(s->vram_start) + s->font_write_pos_y; + unsigned int offset = starty * s->width + startx; buf = &s->vram_buffer[ARTIST_BUFFER_AP]; - if (offset + 16 > buf->size) { + if (startx >= buf->width || starty >= buf->height || + offset + 16 >= buf->size) { return; } for (i = 0; i < 16; i++) { mask = 1 << (15 - i); if (val & mask) { - artist_rop8(s, buf->data + offset + i, color); + artist_rop8(s, buf, offset + i, color); } else { if (!(s->image_bitmap_op & 0x20000000)) { - artist_rop8(s, buf->data + offset + i, s->bg_color); + artist_rop8(s, buf, offset + i, s->bg_color); } } } @@ -1125,7 +1158,7 @@ static void artist_vram_write(void *opaque, hwaddr addr, uint64_t val, struct vram_buffer *buf; int posy = (addr >> 11) & 0x3ff; int posx = addr & 0x7ff; - uint32_t offset; + unsigned int offset; trace_artist_vram_write(size, addr, val); if (s->cmap_bm_access) { @@ -1146,18 +1179,28 @@ static void artist_vram_write(void *opaque, hwaddr addr, uint64_t val, } offset = posy * buf->width + posx; + if (offset >= buf->size) { + return; + } + switch (size) { case 4: - *(uint32_t *)(buf->data + offset) = be32_to_cpu(val); - memory_region_set_dirty(&buf->mr, offset, 4); + if (offset + 3 < buf->size) { + *(uint32_t *)(buf->data + offset) = be32_to_cpu(val); + memory_region_set_dirty(&buf->mr, offset, 4); + } break; case 2: - *(uint16_t *)(buf->data + offset) = be16_to_cpu(val); - memory_region_set_dirty(&buf->mr, offset, 2); + if (offset + 1 < buf->size) { + *(uint16_t *)(buf->data + offset) = be16_to_cpu(val); + memory_region_set_dirty(&buf->mr, offset, 2); + } break; case 1: - *(uint8_t *)(buf->data + offset) = val; - memory_region_set_dirty(&buf->mr, offset, 1); + if (offset < buf->size) { + *(uint8_t *)(buf->data + offset) = val; + memory_region_set_dirty(&buf->mr, offset, 1); + } break; default: break; @@ -1173,9 +1216,12 @@ static uint64_t artist_vram_read(void *opaque, hwaddr addr, unsigned size) if (s->cmap_bm_access) { buf = &s->vram_buffer[ARTIST_BUFFER_CMAP]; - val = *(uint32_t *)(buf->data + addr); + val = 0; + if (addr < buf->size && addr + 3 < buf->size) { + val = *(uint32_t *)(buf->data + addr); + } trace_artist_vram_read(size, addr, 0, 0, val); - return 0; + return val; } buf = vram_read_buffer(s); @@ -1199,20 +1245,16 @@ static const MemoryRegionOps artist_reg_ops = { .read = artist_reg_read, .write = artist_reg_write, .endianness = DEVICE_NATIVE_ENDIAN, - .valid = { - .min_access_size = 1, - .max_access_size = 4, - }, + .impl.min_access_size = 1, + .impl.max_access_size = 4, }; static const MemoryRegionOps artist_vram_ops = { .read = artist_vram_read, .write = artist_vram_write, .endianness = DEVICE_NATIVE_ENDIAN, - .valid = { - .min_access_size = 1, - .max_access_size = 4, - }, + .impl.min_access_size = 1, + .impl.max_access_size = 4, }; static void artist_draw_cursor(ARTISTState *s) diff --git a/hw/hppa/hppa_hardware.h b/hw/hppa/hppa_hardware.h index 4a2fe2df60..cdb7fa6240 100644 --- a/hw/hppa/hppa_hardware.h +++ b/hw/hppa/hppa_hardware.h @@ -17,6 +17,7 @@ #define LASI_UART_HPA 0xffd05000 #define LASI_SCSI_HPA 0xffd06000 #define LASI_LAN_HPA 0xffd07000 +#define LASI_RTC_HPA 0xffd09000 #define LASI_LPT_HPA 0xffd02000 #define LASI_AUDIO_HPA 0xffd04000 #define LASI_PS2KBD_HPA 0xffd08000 @@ -37,10 +38,15 @@ #define PORT_PCI_CMD (PCI_HPA + DINO_PCI_ADDR) #define PORT_PCI_DATA (PCI_HPA + DINO_CONFIG_DATA) +/* QEMU fw_cfg interface port */ +#define QEMU_FW_CFG_IO_BASE (MEMORY_HPA + 0x80) + #define PORT_SERIAL1 (DINO_UART_HPA + 0x800) #define PORT_SERIAL2 (LASI_UART_HPA + 0x800) #define HPPA_MAX_CPUS 8 /* max. number of SMP CPUs */ #define CPU_CLOCK_MHZ 250 /* emulate a 250 MHz CPU */ +#define CPU_HPA_CR_REG 7 /* store CPU HPA in cr7 (SeaBIOS internal) */ + #endif diff --git a/hw/hppa/lasi.c b/hw/hppa/lasi.c index 19974034f3..194aa3e619 100644 --- a/hw/hppa/lasi.c +++ b/hw/hppa/lasi.c @@ -11,6 +11,7 @@ #include "qemu/osdep.h" #include "qemu/units.h" +#include "qemu/log.h" #include "qapi/error.h" #include "cpu.h" #include "trace.h" @@ -54,8 +55,6 @@ #define LASI_CHIP(obj) \ OBJECT_CHECK(LasiState, (obj), TYPE_LASI_CHIP) -#define LASI_RTC_HPA (LASI_HPA + 0x9000) - typedef struct LasiState { PCIHostState parent_obj; @@ -172,8 +171,11 @@ static MemTxResult lasi_chip_write_with_attrs(void *opaque, hwaddr addr, /* read-only. */ break; case LASI_IMR: - s->imr = val; /* 0x20 ?? */ - assert((val & LASI_IRQ_BITS) == val); + s->imr = val; + if (((val & LASI_IRQ_BITS) != val) && (val != 0xffffffff)) + qemu_log_mask(LOG_GUEST_ERROR, + "LASI: tried to set invalid %lx IMR value.\n", + (unsigned long) val); break; case LASI_IPR: /* Any write to IPR clears the register. */ diff --git a/hw/hppa/machine.c b/hw/hppa/machine.c index 49155537cd..90aeefe2a4 100644 --- a/hw/hppa/machine.c +++ b/hw/hppa/machine.c @@ -25,6 +25,8 @@ #define MAX_IDE_BUS 2 +#define MIN_SEABIOS_HPPA_VERSION 1 /* require at least this fw version */ + static ISABus *hppa_isa_bus(void) { ISABus *isa_bus; @@ -56,6 +58,23 @@ static uint64_t cpu_hppa_to_phys(void *opaque, uint64_t addr) static HPPACPU *cpu[HPPA_MAX_CPUS]; static uint64_t firmware_entry; +static FWCfgState *create_fw_cfg(MachineState *ms) +{ + FWCfgState *fw_cfg; + uint64_t val; + + fw_cfg = fw_cfg_init_mem(QEMU_FW_CFG_IO_BASE, QEMU_FW_CFG_IO_BASE + 4); + fw_cfg_add_i16(fw_cfg, FW_CFG_NB_CPUS, ms->smp.cpus); + fw_cfg_add_i16(fw_cfg, FW_CFG_MAX_CPUS, HPPA_MAX_CPUS); + fw_cfg_add_i64(fw_cfg, FW_CFG_RAM_SIZE, ram_size); + + val = cpu_to_le64(MIN_SEABIOS_HPPA_VERSION); + fw_cfg_add_file(fw_cfg, "/etc/firmware-min-version", + g_memdup(&val, sizeof(val)), sizeof(val)); + + return fw_cfg; +} + static void machine_hppa_init(MachineState *machine) { const char *kernel_filename = machine->kernel_filename; @@ -118,6 +137,9 @@ static void machine_hppa_init(MachineState *machine) 115200, serial_hd(0), DEVICE_BIG_ENDIAN); } + /* fw_cfg configuration interface */ + create_fw_cfg(machine); + /* SCSI disk setup. */ dev = DEVICE(pci_create_simple(pci_bus, -1, "lsi53c895a")); lsi53c8xx_handle_legacy_cmdline(dev); diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c index b7bcbbbb2a..7a5a8b3521 100644 --- a/hw/i386/acpi-build.c +++ b/hw/i386/acpi-build.c @@ -1497,7 +1497,7 @@ build_dsdt(GArray *table_data, BIOSLinker *linker, dev = aml_device("PCI0"); aml_append(dev, aml_name_decl("_HID", aml_eisaid("PNP0A03"))); aml_append(dev, aml_name_decl("_ADR", aml_int(0))); - aml_append(dev, aml_name_decl("_UID", aml_int(1))); + aml_append(dev, aml_name_decl("_UID", aml_int(0))); aml_append(sb_scope, dev); aml_append(dsdt, sb_scope); @@ -1512,7 +1512,7 @@ build_dsdt(GArray *table_data, BIOSLinker *linker, aml_append(dev, aml_name_decl("_HID", aml_eisaid("PNP0A08"))); aml_append(dev, aml_name_decl("_CID", aml_eisaid("PNP0A03"))); aml_append(dev, aml_name_decl("_ADR", aml_int(0))); - aml_append(dev, aml_name_decl("_UID", aml_int(1))); + aml_append(dev, aml_name_decl("_UID", aml_int(0))); aml_append(dev, build_q35_osc_method()); aml_append(sb_scope, dev); aml_append(dsdt, sb_scope); diff --git a/hw/riscv/meson.build b/hw/riscv/meson.build index 2de8e5a2fe..25af9db75e 100644 --- a/hw/riscv/meson.build +++ b/hw/riscv/meson.build @@ -1,5 +1,6 @@ riscv_ss = ss.source_set() riscv_ss.add(files('boot.c')) +riscv_ss.add(files('numa.c')) riscv_ss.add(when: 'CONFIG_HART', if_true: files('riscv_hart.c')) riscv_ss.add(when: 'CONFIG_OPENTITAN', if_true: files('opentitan.c')) riscv_ss.add(when: 'CONFIG_RISCV_VIRT', if_true: files('virt.c')) diff --git a/hw/riscv/numa.c b/hw/riscv/numa.c new file mode 100644 index 0000000000..4f92307102 --- /dev/null +++ b/hw/riscv/numa.c @@ -0,0 +1,242 @@ +/* + * QEMU RISC-V NUMA Helper + * + * Copyright (c) 2020 Western Digital Corporation or its affiliates. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2 or later, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#include "qemu/osdep.h" +#include "qemu/units.h" +#include "qemu/log.h" +#include "qemu/error-report.h" +#include "qapi/error.h" +#include "hw/boards.h" +#include "hw/qdev-properties.h" +#include "hw/riscv/numa.h" +#include "sysemu/device_tree.h" + +static bool numa_enabled(const MachineState *ms) +{ + return (ms->numa_state && ms->numa_state->num_nodes) ? true : false; +} + +int riscv_socket_count(const MachineState *ms) +{ + return (numa_enabled(ms)) ? ms->numa_state->num_nodes : 1; +} + +int riscv_socket_first_hartid(const MachineState *ms, int socket_id) +{ + int i, first_hartid = ms->smp.cpus; + + if (!numa_enabled(ms)) { + return (!socket_id) ? 0 : -1; + } + + for (i = 0; i < ms->smp.cpus; i++) { + if (ms->possible_cpus->cpus[i].props.node_id != socket_id) { + continue; + } + if (i < first_hartid) { + first_hartid = i; + } + } + + return (first_hartid < ms->smp.cpus) ? first_hartid : -1; +} + +int riscv_socket_last_hartid(const MachineState *ms, int socket_id) +{ + int i, last_hartid = -1; + + if (!numa_enabled(ms)) { + return (!socket_id) ? ms->smp.cpus - 1 : -1; + } + + for (i = 0; i < ms->smp.cpus; i++) { + if (ms->possible_cpus->cpus[i].props.node_id != socket_id) { + continue; + } + if (i > last_hartid) { + last_hartid = i; + } + } + + return (last_hartid < ms->smp.cpus) ? last_hartid : -1; +} + +int riscv_socket_hart_count(const MachineState *ms, int socket_id) +{ + int first_hartid, last_hartid; + + if (!numa_enabled(ms)) { + return (!socket_id) ? ms->smp.cpus : -1; + } + + first_hartid = riscv_socket_first_hartid(ms, socket_id); + if (first_hartid < 0) { + return -1; + } + + last_hartid = riscv_socket_last_hartid(ms, socket_id); + if (last_hartid < 0) { + return -1; + } + + if (first_hartid > last_hartid) { + return -1; + } + + return last_hartid - first_hartid + 1; +} + +bool riscv_socket_check_hartids(const MachineState *ms, int socket_id) +{ + int i, first_hartid, last_hartid; + + if (!numa_enabled(ms)) { + return (!socket_id) ? true : false; + } + + first_hartid = riscv_socket_first_hartid(ms, socket_id); + if (first_hartid < 0) { + return false; + } + + last_hartid = riscv_socket_last_hartid(ms, socket_id); + if (last_hartid < 0) { + return false; + } + + for (i = first_hartid; i <= last_hartid; i++) { + if (ms->possible_cpus->cpus[i].props.node_id != socket_id) { + return false; + } + } + + return true; +} + +uint64_t riscv_socket_mem_offset(const MachineState *ms, int socket_id) +{ + int i; + uint64_t mem_offset = 0; + + if (!numa_enabled(ms)) { + return 0; + } + + for (i = 0; i < ms->numa_state->num_nodes; i++) { + if (i == socket_id) { + break; + } + mem_offset += ms->numa_state->nodes[i].node_mem; + } + + return (i == socket_id) ? mem_offset : 0; +} + +uint64_t riscv_socket_mem_size(const MachineState *ms, int socket_id) +{ + if (!numa_enabled(ms)) { + return (!socket_id) ? ms->ram_size : 0; + } + + return (socket_id < ms->numa_state->num_nodes) ? + ms->numa_state->nodes[socket_id].node_mem : 0; +} + +void riscv_socket_fdt_write_id(const MachineState *ms, void *fdt, + const char *node_name, int socket_id) +{ + if (numa_enabled(ms)) { + qemu_fdt_setprop_cell(fdt, node_name, "numa-node-id", socket_id); + } +} + +void riscv_socket_fdt_write_distance_matrix(const MachineState *ms, void *fdt) +{ + int i, j, idx; + uint32_t *dist_matrix, dist_matrix_size; + + if (numa_enabled(ms) && ms->numa_state->have_numa_distance) { + dist_matrix_size = riscv_socket_count(ms) * riscv_socket_count(ms); + dist_matrix_size *= (3 * sizeof(uint32_t)); + dist_matrix = g_malloc0(dist_matrix_size); + + for (i = 0; i < riscv_socket_count(ms); i++) { + for (j = 0; j < riscv_socket_count(ms); j++) { + idx = (i * riscv_socket_count(ms) + j) * 3; + dist_matrix[idx + 0] = cpu_to_be32(i); + dist_matrix[idx + 1] = cpu_to_be32(j); + dist_matrix[idx + 2] = + cpu_to_be32(ms->numa_state->nodes[i].distance[j]); + } + } + + qemu_fdt_add_subnode(fdt, "/distance-map"); + qemu_fdt_setprop_string(fdt, "/distance-map", "compatible", + "numa-distance-map-v1"); + qemu_fdt_setprop(fdt, "/distance-map", "distance-matrix", + dist_matrix, dist_matrix_size); + g_free(dist_matrix); + } +} + +CpuInstanceProperties +riscv_numa_cpu_index_to_props(MachineState *ms, unsigned cpu_index) +{ + MachineClass *mc = MACHINE_GET_CLASS(ms); + const CPUArchIdList *possible_cpus = mc->possible_cpu_arch_ids(ms); + + assert(cpu_index < possible_cpus->len); + return possible_cpus->cpus[cpu_index].props; +} + +int64_t riscv_numa_get_default_cpu_node_id(const MachineState *ms, int idx) +{ + int64_t nidx = 0; + + if (ms->numa_state->num_nodes) { + nidx = idx / (ms->smp.cpus / ms->numa_state->num_nodes); + if (ms->numa_state->num_nodes <= nidx) { + nidx = ms->numa_state->num_nodes - 1; + } + } + + return nidx; +} + +const CPUArchIdList *riscv_numa_possible_cpu_arch_ids(MachineState *ms) +{ + int n; + unsigned int max_cpus = ms->smp.max_cpus; + + if (ms->possible_cpus) { + assert(ms->possible_cpus->len == max_cpus); + return ms->possible_cpus; + } + + ms->possible_cpus = g_malloc0(sizeof(CPUArchIdList) + + sizeof(CPUArchId) * max_cpus); + ms->possible_cpus->len = max_cpus; + for (n = 0; n < ms->possible_cpus->len; n++) { + ms->possible_cpus->cpus[n].type = ms->cpu_type; + ms->possible_cpus->cpus[n].arch_id = n; + ms->possible_cpus->cpus[n].props.has_core_id = true; + ms->possible_cpus->cpus[n].props.core_id = n; + } + + return ms->possible_cpus; +} diff --git a/hw/riscv/sifive_clint.c b/hw/riscv/sifive_clint.c index 669c21adc2..15e13d5f7a 100644 --- a/hw/riscv/sifive_clint.c +++ b/hw/riscv/sifive_clint.c @@ -79,7 +79,7 @@ static uint64_t sifive_clint_read(void *opaque, hwaddr addr, unsigned size) SiFiveCLINTState *clint = opaque; if (addr >= clint->sip_base && addr < clint->sip_base + (clint->num_harts << 2)) { - size_t hartid = (addr - clint->sip_base) >> 2; + size_t hartid = clint->hartid_base + ((addr - clint->sip_base) >> 2); CPUState *cpu = qemu_get_cpu(hartid); CPURISCVState *env = cpu ? cpu->env_ptr : NULL; if (!env) { @@ -92,7 +92,8 @@ static uint64_t sifive_clint_read(void *opaque, hwaddr addr, unsigned size) } } else if (addr >= clint->timecmp_base && addr < clint->timecmp_base + (clint->num_harts << 3)) { - size_t hartid = (addr - clint->timecmp_base) >> 3; + size_t hartid = clint->hartid_base + + ((addr - clint->timecmp_base) >> 3); CPUState *cpu = qemu_get_cpu(hartid); CPURISCVState *env = cpu ? cpu->env_ptr : NULL; if (!env) { @@ -129,7 +130,7 @@ static void sifive_clint_write(void *opaque, hwaddr addr, uint64_t value, if (addr >= clint->sip_base && addr < clint->sip_base + (clint->num_harts << 2)) { - size_t hartid = (addr - clint->sip_base) >> 2; + size_t hartid = clint->hartid_base + ((addr - clint->sip_base) >> 2); CPUState *cpu = qemu_get_cpu(hartid); CPURISCVState *env = cpu ? cpu->env_ptr : NULL; if (!env) { @@ -142,7 +143,8 @@ static void sifive_clint_write(void *opaque, hwaddr addr, uint64_t value, return; } else if (addr >= clint->timecmp_base && addr < clint->timecmp_base + (clint->num_harts << 3)) { - size_t hartid = (addr - clint->timecmp_base) >> 3; + size_t hartid = clint->hartid_base + + ((addr - clint->timecmp_base) >> 3); CPUState *cpu = qemu_get_cpu(hartid); CPURISCVState *env = cpu ? cpu->env_ptr : NULL; if (!env) { @@ -186,6 +188,7 @@ static const MemoryRegionOps sifive_clint_ops = { }; static Property sifive_clint_properties[] = { + DEFINE_PROP_UINT32("hartid-base", SiFiveCLINTState, hartid_base, 0), DEFINE_PROP_UINT32("num-harts", SiFiveCLINTState, num_harts, 0), DEFINE_PROP_UINT32("sip-base", SiFiveCLINTState, sip_base, 0), DEFINE_PROP_UINT32("timecmp-base", SiFiveCLINTState, timecmp_base, 0), @@ -227,13 +230,13 @@ type_init(sifive_clint_register_types) /* * Create CLINT device. */ -DeviceState *sifive_clint_create(hwaddr addr, hwaddr size, uint32_t num_harts, - uint32_t sip_base, uint32_t timecmp_base, uint32_t time_base, - bool provide_rdtime) +DeviceState *sifive_clint_create(hwaddr addr, hwaddr size, + uint32_t hartid_base, uint32_t num_harts, uint32_t sip_base, + uint32_t timecmp_base, uint32_t time_base, bool provide_rdtime) { int i; for (i = 0; i < num_harts; i++) { - CPUState *cpu = qemu_get_cpu(i); + CPUState *cpu = qemu_get_cpu(hartid_base + i); CPURISCVState *env = cpu ? cpu->env_ptr : NULL; if (!env) { continue; @@ -247,6 +250,7 @@ DeviceState *sifive_clint_create(hwaddr addr, hwaddr size, uint32_t num_harts, } DeviceState *dev = qdev_new(TYPE_SIFIVE_CLINT); + qdev_prop_set_uint32(dev, "hartid-base", hartid_base); qdev_prop_set_uint32(dev, "num-harts", num_harts); qdev_prop_set_uint32(dev, "sip-base", sip_base); qdev_prop_set_uint32(dev, "timecmp-base", timecmp_base); diff --git a/hw/riscv/sifive_e.c b/hw/riscv/sifive_e.c index c8b060486a..ca55cc438a 100644 --- a/hw/riscv/sifive_e.c +++ b/hw/riscv/sifive_e.c @@ -200,7 +200,7 @@ static void sifive_e_soc_realize(DeviceState *dev, Error **errp) /* MMIO */ s->plic = sifive_plic_create(memmap[SIFIVE_E_PLIC].base, - (char *)SIFIVE_E_PLIC_HART_CONFIG, + (char *)SIFIVE_E_PLIC_HART_CONFIG, 0, SIFIVE_E_PLIC_NUM_SOURCES, SIFIVE_E_PLIC_NUM_PRIORITIES, SIFIVE_E_PLIC_PRIORITY_BASE, @@ -211,7 +211,7 @@ static void sifive_e_soc_realize(DeviceState *dev, Error **errp) SIFIVE_E_PLIC_CONTEXT_STRIDE, memmap[SIFIVE_E_PLIC].size); sifive_clint_create(memmap[SIFIVE_E_CLINT].base, - memmap[SIFIVE_E_CLINT].size, ms->smp.cpus, + memmap[SIFIVE_E_CLINT].size, 0, ms->smp.cpus, SIFIVE_SIP_BASE, SIFIVE_TIMECMP_BASE, SIFIVE_TIME_BASE, false); create_unimplemented_device("riscv.sifive.e.aon", memmap[SIFIVE_E_AON].base, memmap[SIFIVE_E_AON].size); diff --git a/hw/riscv/sifive_plic.c b/hw/riscv/sifive_plic.c index c20c192034..11ef147606 100644 --- a/hw/riscv/sifive_plic.c +++ b/hw/riscv/sifive_plic.c @@ -361,6 +361,7 @@ static const MemoryRegionOps sifive_plic_ops = { static Property sifive_plic_properties[] = { DEFINE_PROP_STRING("hart-config", SiFivePLICState, hart_config), + DEFINE_PROP_UINT32("hartid-base", SiFivePLICState, hartid_base, 0), DEFINE_PROP_UINT32("num-sources", SiFivePLICState, num_sources, 0), DEFINE_PROP_UINT32("num-priorities", SiFivePLICState, num_priorities, 0), DEFINE_PROP_UINT32("priority-base", SiFivePLICState, priority_base, 0), @@ -409,10 +410,12 @@ static void parse_hart_config(SiFivePLICState *plic) } hartid++; - /* store hart/mode combinations */ plic->num_addrs = addrid; + plic->num_harts = hartid; + + /* store hart/mode combinations */ plic->addr_config = g_new(PLICAddr, plic->num_addrs); - addrid = 0, hartid = 0; + addrid = 0, hartid = plic->hartid_base; p = plic->hart_config; while ((c = *p++)) { if (c == ',') { @@ -438,8 +441,6 @@ static void sifive_plic_irq_request(void *opaque, int irq, int level) static void sifive_plic_realize(DeviceState *dev, Error **errp) { - MachineState *ms = MACHINE(qdev_get_machine()); - unsigned int smp_cpus = ms->smp.cpus; SiFivePLICState *plic = SIFIVE_PLIC(dev); int i; @@ -460,8 +461,8 @@ static void sifive_plic_realize(DeviceState *dev, Error **errp) * lost a interrupt in the case a PLIC is attached. The SEIP bit must be * hardware controlled when a PLIC is attached. */ - for (i = 0; i < smp_cpus; i++) { - RISCVCPU *cpu = RISCV_CPU(qemu_get_cpu(i)); + for (i = 0; i < plic->num_harts; i++) { + RISCVCPU *cpu = RISCV_CPU(qemu_get_cpu(plic->hartid_base + i)); if (riscv_cpu_claim_interrupts(cpu, MIP_SEIP) < 0) { error_report("SEIP already claimed"); exit(1); @@ -497,16 +498,17 @@ type_init(sifive_plic_register_types) * Create PLIC device. */ DeviceState *sifive_plic_create(hwaddr addr, char *hart_config, - uint32_t num_sources, uint32_t num_priorities, - uint32_t priority_base, uint32_t pending_base, - uint32_t enable_base, uint32_t enable_stride, - uint32_t context_base, uint32_t context_stride, - uint32_t aperture_size) + uint32_t hartid_base, uint32_t num_sources, + uint32_t num_priorities, uint32_t priority_base, + uint32_t pending_base, uint32_t enable_base, + uint32_t enable_stride, uint32_t context_base, + uint32_t context_stride, uint32_t aperture_size) { DeviceState *dev = qdev_new(TYPE_SIFIVE_PLIC); assert(enable_stride == (enable_stride & -enable_stride)); assert(context_stride == (context_stride & -context_stride)); qdev_prop_set_string(dev, "hart-config", hart_config); + qdev_prop_set_uint32(dev, "hartid-base", hartid_base); qdev_prop_set_uint32(dev, "num-sources", num_sources); qdev_prop_set_uint32(dev, "num-priorities", num_priorities); qdev_prop_set_uint32(dev, "priority-base", priority_base); diff --git a/hw/riscv/sifive_u.c b/hw/riscv/sifive_u.c index 18301e6fa5..a48046c6a0 100644 --- a/hw/riscv/sifive_u.c +++ b/hw/riscv/sifive_u.c @@ -687,7 +687,7 @@ static void sifive_u_soc_realize(DeviceState *dev, Error **errp) /* MMIO */ s->plic = sifive_plic_create(memmap[SIFIVE_U_PLIC].base, - plic_hart_config, + plic_hart_config, 0, SIFIVE_U_PLIC_NUM_SOURCES, SIFIVE_U_PLIC_NUM_PRIORITIES, SIFIVE_U_PLIC_PRIORITY_BASE, @@ -703,7 +703,7 @@ static void sifive_u_soc_realize(DeviceState *dev, Error **errp) sifive_uart_create(system_memory, memmap[SIFIVE_U_UART1].base, serial_hd(1), qdev_get_gpio_in(DEVICE(s->plic), SIFIVE_U_UART1_IRQ)); sifive_clint_create(memmap[SIFIVE_U_CLINT].base, - memmap[SIFIVE_U_CLINT].size, ms->smp.cpus, + memmap[SIFIVE_U_CLINT].size, 0, ms->smp.cpus, SIFIVE_SIP_BASE, SIFIVE_TIMECMP_BASE, SIFIVE_TIME_BASE, false); if (!sysbus_realize(SYS_BUS_DEVICE(&s->prci), errp)) { diff --git a/hw/riscv/spike.c b/hw/riscv/spike.c index 13958bdbeb..56f5fe73c7 100644 --- a/hw/riscv/spike.c +++ b/hw/riscv/spike.c @@ -36,6 +36,7 @@ #include "hw/riscv/sifive_clint.h" #include "hw/riscv/spike.h" #include "hw/riscv/boot.h" +#include "hw/riscv/numa.h" #include "chardev/char.h" #include "sysemu/arch_init.h" #include "sysemu/device_tree.h" @@ -66,9 +67,14 @@ static void create_fdt(SpikeState *s, const struct MemmapEntry *memmap, uint64_t mem_size, const char *cmdline) { void *fdt; - int cpu; - uint32_t *cells; - char *nodename; + uint64_t addr, size; + unsigned long clint_addr; + int cpu, socket; + MachineState *mc = MACHINE(s); + uint32_t *clint_cells; + uint32_t cpu_phandle, intc_phandle, phandle = 1; + char *name, *mem_name, *clint_name, *clust_name; + char *core_name, *cpu_name, *intc_name; fdt = s->fdt = create_device_tree(&s->fdt_size); if (!fdt) { @@ -90,68 +96,91 @@ static void create_fdt(SpikeState *s, const struct MemmapEntry *memmap, qemu_fdt_setprop_cell(fdt, "/soc", "#size-cells", 0x2); qemu_fdt_setprop_cell(fdt, "/soc", "#address-cells", 0x2); - nodename = g_strdup_printf("/memory@%lx", - (long)memmap[SPIKE_DRAM].base); - qemu_fdt_add_subnode(fdt, nodename); - qemu_fdt_setprop_cells(fdt, nodename, "reg", - memmap[SPIKE_DRAM].base >> 32, memmap[SPIKE_DRAM].base, - mem_size >> 32, mem_size); - qemu_fdt_setprop_string(fdt, nodename, "device_type", "memory"); - g_free(nodename); - qemu_fdt_add_subnode(fdt, "/cpus"); qemu_fdt_setprop_cell(fdt, "/cpus", "timebase-frequency", SIFIVE_CLINT_TIMEBASE_FREQ); qemu_fdt_setprop_cell(fdt, "/cpus", "#size-cells", 0x0); qemu_fdt_setprop_cell(fdt, "/cpus", "#address-cells", 0x1); + qemu_fdt_add_subnode(fdt, "/cpus/cpu-map"); + + for (socket = (riscv_socket_count(mc) - 1); socket >= 0; socket--) { + clust_name = g_strdup_printf("/cpus/cpu-map/cluster%d", socket); + qemu_fdt_add_subnode(fdt, clust_name); + + clint_cells = g_new0(uint32_t, s->soc[socket].num_harts * 4); - for (cpu = s->soc.num_harts - 1; cpu >= 0; cpu--) { - nodename = g_strdup_printf("/cpus/cpu@%d", cpu); - char *intc = g_strdup_printf("/cpus/cpu@%d/interrupt-controller", cpu); - char *isa = riscv_isa_string(&s->soc.harts[cpu]); - qemu_fdt_add_subnode(fdt, nodename); + for (cpu = s->soc[socket].num_harts - 1; cpu >= 0; cpu--) { + cpu_phandle = phandle++; + + cpu_name = g_strdup_printf("/cpus/cpu@%d", + s->soc[socket].hartid_base + cpu); + qemu_fdt_add_subnode(fdt, cpu_name); #if defined(TARGET_RISCV32) - qemu_fdt_setprop_string(fdt, nodename, "mmu-type", "riscv,sv32"); + qemu_fdt_setprop_string(fdt, cpu_name, "mmu-type", "riscv,sv32"); #else - qemu_fdt_setprop_string(fdt, nodename, "mmu-type", "riscv,sv48"); + qemu_fdt_setprop_string(fdt, cpu_name, "mmu-type", "riscv,sv48"); #endif - qemu_fdt_setprop_string(fdt, nodename, "riscv,isa", isa); - qemu_fdt_setprop_string(fdt, nodename, "compatible", "riscv"); - qemu_fdt_setprop_string(fdt, nodename, "status", "okay"); - qemu_fdt_setprop_cell(fdt, nodename, "reg", cpu); - qemu_fdt_setprop_string(fdt, nodename, "device_type", "cpu"); - qemu_fdt_add_subnode(fdt, intc); - qemu_fdt_setprop_cell(fdt, intc, "phandle", 1); - qemu_fdt_setprop_string(fdt, intc, "compatible", "riscv,cpu-intc"); - qemu_fdt_setprop(fdt, intc, "interrupt-controller", NULL, 0); - qemu_fdt_setprop_cell(fdt, intc, "#interrupt-cells", 1); - g_free(isa); - g_free(intc); - g_free(nodename); - } + name = riscv_isa_string(&s->soc[socket].harts[cpu]); + qemu_fdt_setprop_string(fdt, cpu_name, "riscv,isa", name); + g_free(name); + qemu_fdt_setprop_string(fdt, cpu_name, "compatible", "riscv"); + qemu_fdt_setprop_string(fdt, cpu_name, "status", "okay"); + qemu_fdt_setprop_cell(fdt, cpu_name, "reg", + s->soc[socket].hartid_base + cpu); + qemu_fdt_setprop_string(fdt, cpu_name, "device_type", "cpu"); + riscv_socket_fdt_write_id(mc, fdt, cpu_name, socket); + qemu_fdt_setprop_cell(fdt, cpu_name, "phandle", cpu_phandle); - cells = g_new0(uint32_t, s->soc.num_harts * 4); - for (cpu = 0; cpu < s->soc.num_harts; cpu++) { - nodename = - g_strdup_printf("/cpus/cpu@%d/interrupt-controller", cpu); - uint32_t intc_phandle = qemu_fdt_get_phandle(fdt, nodename); - cells[cpu * 4 + 0] = cpu_to_be32(intc_phandle); - cells[cpu * 4 + 1] = cpu_to_be32(IRQ_M_SOFT); - cells[cpu * 4 + 2] = cpu_to_be32(intc_phandle); - cells[cpu * 4 + 3] = cpu_to_be32(IRQ_M_TIMER); - g_free(nodename); + intc_name = g_strdup_printf("%s/interrupt-controller", cpu_name); + qemu_fdt_add_subnode(fdt, intc_name); + intc_phandle = phandle++; + qemu_fdt_setprop_cell(fdt, intc_name, "phandle", intc_phandle); + qemu_fdt_setprop_string(fdt, intc_name, "compatible", + "riscv,cpu-intc"); + qemu_fdt_setprop(fdt, intc_name, "interrupt-controller", NULL, 0); + qemu_fdt_setprop_cell(fdt, intc_name, "#interrupt-cells", 1); + + clint_cells[cpu * 4 + 0] = cpu_to_be32(intc_phandle); + clint_cells[cpu * 4 + 1] = cpu_to_be32(IRQ_M_SOFT); + clint_cells[cpu * 4 + 2] = cpu_to_be32(intc_phandle); + clint_cells[cpu * 4 + 3] = cpu_to_be32(IRQ_M_TIMER); + + core_name = g_strdup_printf("%s/core%d", clust_name, cpu); + qemu_fdt_add_subnode(fdt, core_name); + qemu_fdt_setprop_cell(fdt, core_name, "cpu", cpu_phandle); + + g_free(core_name); + g_free(intc_name); + g_free(cpu_name); + } + + addr = memmap[SPIKE_DRAM].base + riscv_socket_mem_offset(mc, socket); + size = riscv_socket_mem_size(mc, socket); + mem_name = g_strdup_printf("/memory@%lx", (long)addr); + qemu_fdt_add_subnode(fdt, mem_name); + qemu_fdt_setprop_cells(fdt, mem_name, "reg", + addr >> 32, addr, size >> 32, size); + qemu_fdt_setprop_string(fdt, mem_name, "device_type", "memory"); + riscv_socket_fdt_write_id(mc, fdt, mem_name, socket); + g_free(mem_name); + + clint_addr = memmap[SPIKE_CLINT].base + + (memmap[SPIKE_CLINT].size * socket); + clint_name = g_strdup_printf("/soc/clint@%lx", clint_addr); + qemu_fdt_add_subnode(fdt, clint_name); + qemu_fdt_setprop_string(fdt, clint_name, "compatible", "riscv,clint0"); + qemu_fdt_setprop_cells(fdt, clint_name, "reg", + 0x0, clint_addr, 0x0, memmap[SPIKE_CLINT].size); + qemu_fdt_setprop(fdt, clint_name, "interrupts-extended", + clint_cells, s->soc[socket].num_harts * sizeof(uint32_t) * 4); + riscv_socket_fdt_write_id(mc, fdt, clint_name, socket); + + g_free(clint_name); + g_free(clint_cells); + g_free(clust_name); } - nodename = g_strdup_printf("/soc/clint@%lx", - (long)memmap[SPIKE_CLINT].base); - qemu_fdt_add_subnode(fdt, nodename); - qemu_fdt_setprop_string(fdt, nodename, "compatible", "riscv,clint0"); - qemu_fdt_setprop_cells(fdt, nodename, "reg", - 0x0, memmap[SPIKE_CLINT].base, - 0x0, memmap[SPIKE_CLINT].size); - qemu_fdt_setprop(fdt, nodename, "interrupts-extended", - cells, s->soc.num_harts * sizeof(uint32_t) * 4); - g_free(cells); - g_free(nodename); + + riscv_socket_fdt_write_distance_matrix(mc, fdt); if (cmdline) { qemu_fdt_add_subnode(fdt, "/chosen"); @@ -162,23 +191,59 @@ static void create_fdt(SpikeState *s, const struct MemmapEntry *memmap, static void spike_board_init(MachineState *machine) { const struct MemmapEntry *memmap = spike_memmap; - - SpikeState *s = g_new0(SpikeState, 1); + SpikeState *s = SPIKE_MACHINE(machine); MemoryRegion *system_memory = get_system_memory(); MemoryRegion *main_mem = g_new(MemoryRegion, 1); MemoryRegion *mask_rom = g_new(MemoryRegion, 1); - unsigned int smp_cpus = machine->smp.cpus; uint32_t fdt_load_addr; uint64_t kernel_entry; + char *soc_name; + int i, base_hartid, hart_count; - /* Initialize SOC */ - object_initialize_child(OBJECT(machine), "soc", &s->soc, - TYPE_RISCV_HART_ARRAY); - object_property_set_str(OBJECT(&s->soc), "cpu-type", machine->cpu_type, - &error_abort); - object_property_set_int(OBJECT(&s->soc), "num-harts", smp_cpus, - &error_abort); - sysbus_realize(SYS_BUS_DEVICE(&s->soc), &error_abort); + /* Check socket count limit */ + if (SPIKE_SOCKETS_MAX < riscv_socket_count(machine)) { + error_report("number of sockets/nodes should be less than %d", + SPIKE_SOCKETS_MAX); + exit(1); + } + + /* Initialize sockets */ + for (i = 0; i < riscv_socket_count(machine); i++) { + if (!riscv_socket_check_hartids(machine, i)) { + error_report("discontinuous hartids in socket%d", i); + exit(1); + } + + base_hartid = riscv_socket_first_hartid(machine, i); + if (base_hartid < 0) { + error_report("can't find hartid base for socket%d", i); + exit(1); + } + + hart_count = riscv_socket_hart_count(machine, i); + if (hart_count < 0) { + error_report("can't find hart count for socket%d", i); + exit(1); + } + + soc_name = g_strdup_printf("soc%d", i); + object_initialize_child(OBJECT(machine), soc_name, &s->soc[i], + TYPE_RISCV_HART_ARRAY); + g_free(soc_name); + object_property_set_str(OBJECT(&s->soc[i]), "cpu-type", + machine->cpu_type, &error_abort); + object_property_set_int(OBJECT(&s->soc[i]), "hartid-base", + base_hartid, &error_abort); + object_property_set_int(OBJECT(&s->soc[i]), "num-harts", + hart_count, &error_abort); + sysbus_realize(SYS_BUS_DEVICE(&s->soc[i]), &error_abort); + + /* Core Local Interruptor (timer and IPI) for each socket */ + sifive_clint_create( + memmap[SPIKE_CLINT].base + i * memmap[SPIKE_CLINT].size, + memmap[SPIKE_CLINT].size, base_hartid, hart_count, + SIFIVE_SIP_BASE, SIFIVE_TIMECMP_BASE, SIFIVE_TIME_BASE, false); + } /* register system main memory (actual RAM) */ memory_region_init_ram(main_mem, NULL, "riscv.spike.ram", @@ -230,21 +295,40 @@ static void spike_board_init(MachineState *machine) fdt_load_addr, s->fdt); /* initialize HTIF using symbols found in load_kernel */ - htif_mm_init(system_memory, mask_rom, &s->soc.harts[0].env, serial_hd(0)); + htif_mm_init(system_memory, mask_rom, + &s->soc[0].harts[0].env, serial_hd(0)); +} - /* Core Local Interruptor (timer and IPI) */ - sifive_clint_create(memmap[SPIKE_CLINT].base, memmap[SPIKE_CLINT].size, - smp_cpus, SIFIVE_SIP_BASE, SIFIVE_TIMECMP_BASE, SIFIVE_TIME_BASE, - false); +static void spike_machine_instance_init(Object *obj) +{ } -static void spike_machine_init(MachineClass *mc) +static void spike_machine_class_init(ObjectClass *oc, void *data) { - mc->desc = "RISC-V Spike Board"; + MachineClass *mc = MACHINE_CLASS(oc); + + mc->desc = "RISC-V Spike board"; mc->init = spike_board_init; - mc->max_cpus = 8; + mc->max_cpus = SPIKE_CPUS_MAX; mc->is_default = true; mc->default_cpu_type = SPIKE_V1_10_0_CPU; + mc->possible_cpu_arch_ids = riscv_numa_possible_cpu_arch_ids; + mc->cpu_index_to_instance_props = riscv_numa_cpu_index_to_props; + mc->get_default_cpu_node_id = riscv_numa_get_default_cpu_node_id; + mc->numa_mem_supported = true; +} + +static const TypeInfo spike_machine_typeinfo = { + .name = MACHINE_TYPE_NAME("spike"), + .parent = TYPE_MACHINE, + .class_init = spike_machine_class_init, + .instance_init = spike_machine_instance_init, + .instance_size = sizeof(SpikeState), +}; + +static void spike_machine_init_register_types(void) +{ + type_register_static(&spike_machine_typeinfo); } -DEFINE_MACHINE("spike", spike_machine_init) +type_init(spike_machine_init_register_types) diff --git a/hw/riscv/virt.c b/hw/riscv/virt.c index 6e91cf129e..6fca513ec9 100644 --- a/hw/riscv/virt.c +++ b/hw/riscv/virt.c @@ -35,6 +35,7 @@ #include "hw/riscv/sifive_test.h" #include "hw/riscv/virt.h" #include "hw/riscv/boot.h" +#include "hw/riscv/numa.h" #include "chardev/char.h" #include "sysemu/arch_init.h" #include "sysemu/device_tree.h" @@ -58,7 +59,7 @@ static const struct MemmapEntry { [VIRT_RTC] = { 0x101000, 0x1000 }, [VIRT_CLINT] = { 0x2000000, 0x10000 }, [VIRT_PCIE_PIO] = { 0x3000000, 0x10000 }, - [VIRT_PLIC] = { 0xc000000, 0x4000000 }, + [VIRT_PLIC] = { 0xc000000, VIRT_PLIC_SIZE(VIRT_CPUS_MAX * 2) }, [VIRT_UART0] = { 0x10000000, 0x100 }, [VIRT_VIRTIO] = { 0x10001000, 0x1000 }, [VIRT_FLASH] = { 0x20000000, 0x4000000 }, @@ -179,10 +180,17 @@ static void create_fdt(RISCVVirtState *s, const struct MemmapEntry *memmap, uint64_t mem_size, const char *cmdline) { void *fdt; - int cpu, i; - uint32_t *cells; - char *nodename; - uint32_t plic_phandle, test_phandle, phandle = 1; + int i, cpu, socket; + MachineState *mc = MACHINE(s); + uint64_t addr, size; + uint32_t *clint_cells, *plic_cells; + unsigned long clint_addr, plic_addr; + uint32_t plic_phandle[MAX_NODES]; + uint32_t cpu_phandle, intc_phandle, test_phandle; + uint32_t phandle = 1, plic_mmio_phandle = 1; + uint32_t plic_pcie_phandle = 1, plic_virtio_phandle = 1; + char *mem_name, *cpu_name, *core_name, *intc_name; + char *name, *clint_name, *plic_name, *clust_name; hwaddr flashsize = virt_memmap[VIRT_FLASH].size / 2; hwaddr flashbase = virt_memmap[VIRT_FLASH].base; @@ -203,231 +211,238 @@ static void create_fdt(RISCVVirtState *s, const struct MemmapEntry *memmap, qemu_fdt_setprop_cell(fdt, "/soc", "#size-cells", 0x2); qemu_fdt_setprop_cell(fdt, "/soc", "#address-cells", 0x2); - nodename = g_strdup_printf("/memory@%lx", - (long)memmap[VIRT_DRAM].base); - qemu_fdt_add_subnode(fdt, nodename); - qemu_fdt_setprop_cells(fdt, nodename, "reg", - memmap[VIRT_DRAM].base >> 32, memmap[VIRT_DRAM].base, - mem_size >> 32, mem_size); - qemu_fdt_setprop_string(fdt, nodename, "device_type", "memory"); - g_free(nodename); - qemu_fdt_add_subnode(fdt, "/cpus"); qemu_fdt_setprop_cell(fdt, "/cpus", "timebase-frequency", SIFIVE_CLINT_TIMEBASE_FREQ); qemu_fdt_setprop_cell(fdt, "/cpus", "#size-cells", 0x0); qemu_fdt_setprop_cell(fdt, "/cpus", "#address-cells", 0x1); + qemu_fdt_add_subnode(fdt, "/cpus/cpu-map"); + + for (socket = (riscv_socket_count(mc) - 1); socket >= 0; socket--) { + clust_name = g_strdup_printf("/cpus/cpu-map/cluster%d", socket); + qemu_fdt_add_subnode(fdt, clust_name); + + plic_cells = g_new0(uint32_t, s->soc[socket].num_harts * 4); + clint_cells = g_new0(uint32_t, s->soc[socket].num_harts * 4); + + for (cpu = s->soc[socket].num_harts - 1; cpu >= 0; cpu--) { + cpu_phandle = phandle++; - for (cpu = s->soc.num_harts - 1; cpu >= 0; cpu--) { - int cpu_phandle = phandle++; - int intc_phandle; - nodename = g_strdup_printf("/cpus/cpu@%d", cpu); - char *intc = g_strdup_printf("/cpus/cpu@%d/interrupt-controller", cpu); - char *isa = riscv_isa_string(&s->soc.harts[cpu]); - qemu_fdt_add_subnode(fdt, nodename); + cpu_name = g_strdup_printf("/cpus/cpu@%d", + s->soc[socket].hartid_base + cpu); + qemu_fdt_add_subnode(fdt, cpu_name); #if defined(TARGET_RISCV32) - qemu_fdt_setprop_string(fdt, nodename, "mmu-type", "riscv,sv32"); + qemu_fdt_setprop_string(fdt, cpu_name, "mmu-type", "riscv,sv32"); #else - qemu_fdt_setprop_string(fdt, nodename, "mmu-type", "riscv,sv48"); + qemu_fdt_setprop_string(fdt, cpu_name, "mmu-type", "riscv,sv48"); #endif - qemu_fdt_setprop_string(fdt, nodename, "riscv,isa", isa); - qemu_fdt_setprop_string(fdt, nodename, "compatible", "riscv"); - qemu_fdt_setprop_string(fdt, nodename, "status", "okay"); - qemu_fdt_setprop_cell(fdt, nodename, "reg", cpu); - qemu_fdt_setprop_string(fdt, nodename, "device_type", "cpu"); - qemu_fdt_setprop_cell(fdt, nodename, "phandle", cpu_phandle); - intc_phandle = phandle++; - qemu_fdt_add_subnode(fdt, intc); - qemu_fdt_setprop_cell(fdt, intc, "phandle", intc_phandle); - qemu_fdt_setprop_string(fdt, intc, "compatible", "riscv,cpu-intc"); - qemu_fdt_setprop(fdt, intc, "interrupt-controller", NULL, 0); - qemu_fdt_setprop_cell(fdt, intc, "#interrupt-cells", 1); - g_free(isa); - g_free(intc); - g_free(nodename); - } + name = riscv_isa_string(&s->soc[socket].harts[cpu]); + qemu_fdt_setprop_string(fdt, cpu_name, "riscv,isa", name); + g_free(name); + qemu_fdt_setprop_string(fdt, cpu_name, "compatible", "riscv"); + qemu_fdt_setprop_string(fdt, cpu_name, "status", "okay"); + qemu_fdt_setprop_cell(fdt, cpu_name, "reg", + s->soc[socket].hartid_base + cpu); + qemu_fdt_setprop_string(fdt, cpu_name, "device_type", "cpu"); + riscv_socket_fdt_write_id(mc, fdt, cpu_name, socket); + qemu_fdt_setprop_cell(fdt, cpu_name, "phandle", cpu_phandle); + + intc_name = g_strdup_printf("%s/interrupt-controller", cpu_name); + qemu_fdt_add_subnode(fdt, intc_name); + intc_phandle = phandle++; + qemu_fdt_setprop_cell(fdt, intc_name, "phandle", intc_phandle); + qemu_fdt_setprop_string(fdt, intc_name, "compatible", + "riscv,cpu-intc"); + qemu_fdt_setprop(fdt, intc_name, "interrupt-controller", NULL, 0); + qemu_fdt_setprop_cell(fdt, intc_name, "#interrupt-cells", 1); + + clint_cells[cpu * 4 + 0] = cpu_to_be32(intc_phandle); + clint_cells[cpu * 4 + 1] = cpu_to_be32(IRQ_M_SOFT); + clint_cells[cpu * 4 + 2] = cpu_to_be32(intc_phandle); + clint_cells[cpu * 4 + 3] = cpu_to_be32(IRQ_M_TIMER); + + plic_cells[cpu * 4 + 0] = cpu_to_be32(intc_phandle); + plic_cells[cpu * 4 + 1] = cpu_to_be32(IRQ_M_EXT); + plic_cells[cpu * 4 + 2] = cpu_to_be32(intc_phandle); + plic_cells[cpu * 4 + 3] = cpu_to_be32(IRQ_S_EXT); + + core_name = g_strdup_printf("%s/core%d", clust_name, cpu); + qemu_fdt_add_subnode(fdt, core_name); + qemu_fdt_setprop_cell(fdt, core_name, "cpu", cpu_phandle); + + g_free(core_name); + g_free(intc_name); + g_free(cpu_name); + } - /* Add cpu-topology node */ - qemu_fdt_add_subnode(fdt, "/cpus/cpu-map"); - qemu_fdt_add_subnode(fdt, "/cpus/cpu-map/cluster0"); - for (cpu = s->soc.num_harts - 1; cpu >= 0; cpu--) { - char *core_nodename = g_strdup_printf("/cpus/cpu-map/cluster0/core%d", - cpu); - char *cpu_nodename = g_strdup_printf("/cpus/cpu@%d", cpu); - uint32_t intc_phandle = qemu_fdt_get_phandle(fdt, cpu_nodename); - qemu_fdt_add_subnode(fdt, core_nodename); - qemu_fdt_setprop_cell(fdt, core_nodename, "cpu", intc_phandle); - g_free(core_nodename); - g_free(cpu_nodename); + addr = memmap[VIRT_DRAM].base + riscv_socket_mem_offset(mc, socket); + size = riscv_socket_mem_size(mc, socket); + mem_name = g_strdup_printf("/memory@%lx", (long)addr); + qemu_fdt_add_subnode(fdt, mem_name); + qemu_fdt_setprop_cells(fdt, mem_name, "reg", + addr >> 32, addr, size >> 32, size); + qemu_fdt_setprop_string(fdt, mem_name, "device_type", "memory"); + riscv_socket_fdt_write_id(mc, fdt, mem_name, socket); + g_free(mem_name); + + clint_addr = memmap[VIRT_CLINT].base + + (memmap[VIRT_CLINT].size * socket); + clint_name = g_strdup_printf("/soc/clint@%lx", clint_addr); + qemu_fdt_add_subnode(fdt, clint_name); + qemu_fdt_setprop_string(fdt, clint_name, "compatible", "riscv,clint0"); + qemu_fdt_setprop_cells(fdt, clint_name, "reg", + 0x0, clint_addr, 0x0, memmap[VIRT_CLINT].size); + qemu_fdt_setprop(fdt, clint_name, "interrupts-extended", + clint_cells, s->soc[socket].num_harts * sizeof(uint32_t) * 4); + riscv_socket_fdt_write_id(mc, fdt, clint_name, socket); + g_free(clint_name); + + plic_phandle[socket] = phandle++; + plic_addr = memmap[VIRT_PLIC].base + (memmap[VIRT_PLIC].size * socket); + plic_name = g_strdup_printf("/soc/plic@%lx", plic_addr); + qemu_fdt_add_subnode(fdt, plic_name); + qemu_fdt_setprop_cell(fdt, plic_name, + "#address-cells", FDT_PLIC_ADDR_CELLS); + qemu_fdt_setprop_cell(fdt, plic_name, + "#interrupt-cells", FDT_PLIC_INT_CELLS); + qemu_fdt_setprop_string(fdt, plic_name, "compatible", "riscv,plic0"); + qemu_fdt_setprop(fdt, plic_name, "interrupt-controller", NULL, 0); + qemu_fdt_setprop(fdt, plic_name, "interrupts-extended", + plic_cells, s->soc[socket].num_harts * sizeof(uint32_t) * 4); + qemu_fdt_setprop_cells(fdt, plic_name, "reg", + 0x0, plic_addr, 0x0, memmap[VIRT_PLIC].size); + qemu_fdt_setprop_cell(fdt, plic_name, "riscv,ndev", VIRTIO_NDEV); + riscv_socket_fdt_write_id(mc, fdt, plic_name, socket); + qemu_fdt_setprop_cell(fdt, plic_name, "phandle", plic_phandle[socket]); + g_free(plic_name); + + g_free(clint_cells); + g_free(plic_cells); + g_free(clust_name); } - cells = g_new0(uint32_t, s->soc.num_harts * 4); - for (cpu = 0; cpu < s->soc.num_harts; cpu++) { - nodename = - g_strdup_printf("/cpus/cpu@%d/interrupt-controller", cpu); - uint32_t intc_phandle = qemu_fdt_get_phandle(fdt, nodename); - cells[cpu * 4 + 0] = cpu_to_be32(intc_phandle); - cells[cpu * 4 + 1] = cpu_to_be32(IRQ_M_SOFT); - cells[cpu * 4 + 2] = cpu_to_be32(intc_phandle); - cells[cpu * 4 + 3] = cpu_to_be32(IRQ_M_TIMER); - g_free(nodename); - } - nodename = g_strdup_printf("/soc/clint@%lx", - (long)memmap[VIRT_CLINT].base); - qemu_fdt_add_subnode(fdt, nodename); - qemu_fdt_setprop_string(fdt, nodename, "compatible", "riscv,clint0"); - qemu_fdt_setprop_cells(fdt, nodename, "reg", - 0x0, memmap[VIRT_CLINT].base, - 0x0, memmap[VIRT_CLINT].size); - qemu_fdt_setprop(fdt, nodename, "interrupts-extended", - cells, s->soc.num_harts * sizeof(uint32_t) * 4); - g_free(cells); - g_free(nodename); - - plic_phandle = phandle++; - cells = g_new0(uint32_t, s->soc.num_harts * 4); - for (cpu = 0; cpu < s->soc.num_harts; cpu++) { - nodename = - g_strdup_printf("/cpus/cpu@%d/interrupt-controller", cpu); - uint32_t intc_phandle = qemu_fdt_get_phandle(fdt, nodename); - cells[cpu * 4 + 0] = cpu_to_be32(intc_phandle); - cells[cpu * 4 + 1] = cpu_to_be32(IRQ_M_EXT); - cells[cpu * 4 + 2] = cpu_to_be32(intc_phandle); - cells[cpu * 4 + 3] = cpu_to_be32(IRQ_S_EXT); - g_free(nodename); + for (socket = 0; socket < riscv_socket_count(mc); socket++) { + if (socket == 0) { + plic_mmio_phandle = plic_phandle[socket]; + plic_virtio_phandle = plic_phandle[socket]; + plic_pcie_phandle = plic_phandle[socket]; + } + if (socket == 1) { + plic_virtio_phandle = plic_phandle[socket]; + plic_pcie_phandle = plic_phandle[socket]; + } + if (socket == 2) { + plic_pcie_phandle = plic_phandle[socket]; + } } - nodename = g_strdup_printf("/soc/interrupt-controller@%lx", - (long)memmap[VIRT_PLIC].base); - qemu_fdt_add_subnode(fdt, nodename); - qemu_fdt_setprop_cell(fdt, nodename, "#address-cells", - FDT_PLIC_ADDR_CELLS); - qemu_fdt_setprop_cell(fdt, nodename, "#interrupt-cells", - FDT_PLIC_INT_CELLS); - qemu_fdt_setprop_string(fdt, nodename, "compatible", "riscv,plic0"); - qemu_fdt_setprop(fdt, nodename, "interrupt-controller", NULL, 0); - qemu_fdt_setprop(fdt, nodename, "interrupts-extended", - cells, s->soc.num_harts * sizeof(uint32_t) * 4); - qemu_fdt_setprop_cells(fdt, nodename, "reg", - 0x0, memmap[VIRT_PLIC].base, - 0x0, memmap[VIRT_PLIC].size); - qemu_fdt_setprop_cell(fdt, nodename, "riscv,ndev", VIRTIO_NDEV); - qemu_fdt_setprop_cell(fdt, nodename, "phandle", plic_phandle); - plic_phandle = qemu_fdt_get_phandle(fdt, nodename); - g_free(cells); - g_free(nodename); + + riscv_socket_fdt_write_distance_matrix(mc, fdt); for (i = 0; i < VIRTIO_COUNT; i++) { - nodename = g_strdup_printf("/virtio_mmio@%lx", + name = g_strdup_printf("/soc/virtio_mmio@%lx", (long)(memmap[VIRT_VIRTIO].base + i * memmap[VIRT_VIRTIO].size)); - qemu_fdt_add_subnode(fdt, nodename); - qemu_fdt_setprop_string(fdt, nodename, "compatible", "virtio,mmio"); - qemu_fdt_setprop_cells(fdt, nodename, "reg", + qemu_fdt_add_subnode(fdt, name); + qemu_fdt_setprop_string(fdt, name, "compatible", "virtio,mmio"); + qemu_fdt_setprop_cells(fdt, name, "reg", 0x0, memmap[VIRT_VIRTIO].base + i * memmap[VIRT_VIRTIO].size, 0x0, memmap[VIRT_VIRTIO].size); - qemu_fdt_setprop_cell(fdt, nodename, "interrupt-parent", plic_phandle); - qemu_fdt_setprop_cell(fdt, nodename, "interrupts", VIRTIO_IRQ + i); - g_free(nodename); + qemu_fdt_setprop_cell(fdt, name, "interrupt-parent", + plic_virtio_phandle); + qemu_fdt_setprop_cell(fdt, name, "interrupts", VIRTIO_IRQ + i); + g_free(name); } - nodename = g_strdup_printf("/soc/pci@%lx", + name = g_strdup_printf("/soc/pci@%lx", (long) memmap[VIRT_PCIE_ECAM].base); - qemu_fdt_add_subnode(fdt, nodename); - qemu_fdt_setprop_cell(fdt, nodename, "#address-cells", - FDT_PCI_ADDR_CELLS); - qemu_fdt_setprop_cell(fdt, nodename, "#interrupt-cells", - FDT_PCI_INT_CELLS); - qemu_fdt_setprop_cell(fdt, nodename, "#size-cells", 0x2); - qemu_fdt_setprop_string(fdt, nodename, "compatible", - "pci-host-ecam-generic"); - qemu_fdt_setprop_string(fdt, nodename, "device_type", "pci"); - qemu_fdt_setprop_cell(fdt, nodename, "linux,pci-domain", 0); - qemu_fdt_setprop_cells(fdt, nodename, "bus-range", 0, - memmap[VIRT_PCIE_ECAM].size / - PCIE_MMCFG_SIZE_MIN - 1); - qemu_fdt_setprop(fdt, nodename, "dma-coherent", NULL, 0); - qemu_fdt_setprop_cells(fdt, nodename, "reg", 0, memmap[VIRT_PCIE_ECAM].base, - 0, memmap[VIRT_PCIE_ECAM].size); - qemu_fdt_setprop_sized_cells(fdt, nodename, "ranges", + qemu_fdt_add_subnode(fdt, name); + qemu_fdt_setprop_cell(fdt, name, "#address-cells", FDT_PCI_ADDR_CELLS); + qemu_fdt_setprop_cell(fdt, name, "#interrupt-cells", FDT_PCI_INT_CELLS); + qemu_fdt_setprop_cell(fdt, name, "#size-cells", 0x2); + qemu_fdt_setprop_string(fdt, name, "compatible", "pci-host-ecam-generic"); + qemu_fdt_setprop_string(fdt, name, "device_type", "pci"); + qemu_fdt_setprop_cell(fdt, name, "linux,pci-domain", 0); + qemu_fdt_setprop_cells(fdt, name, "bus-range", 0, + memmap[VIRT_PCIE_ECAM].size / PCIE_MMCFG_SIZE_MIN - 1); + qemu_fdt_setprop(fdt, name, "dma-coherent", NULL, 0); + qemu_fdt_setprop_cells(fdt, name, "reg", 0, + memmap[VIRT_PCIE_ECAM].base, 0, memmap[VIRT_PCIE_ECAM].size); + qemu_fdt_setprop_sized_cells(fdt, name, "ranges", 1, FDT_PCI_RANGE_IOPORT, 2, 0, 2, memmap[VIRT_PCIE_PIO].base, 2, memmap[VIRT_PCIE_PIO].size, 1, FDT_PCI_RANGE_MMIO, 2, memmap[VIRT_PCIE_MMIO].base, 2, memmap[VIRT_PCIE_MMIO].base, 2, memmap[VIRT_PCIE_MMIO].size); - create_pcie_irq_map(fdt, nodename, plic_phandle); - g_free(nodename); + create_pcie_irq_map(fdt, name, plic_pcie_phandle); + g_free(name); test_phandle = phandle++; - nodename = g_strdup_printf("/test@%lx", + name = g_strdup_printf("/soc/test@%lx", (long)memmap[VIRT_TEST].base); - qemu_fdt_add_subnode(fdt, nodename); + qemu_fdt_add_subnode(fdt, name); { const char compat[] = "sifive,test1\0sifive,test0\0syscon"; - qemu_fdt_setprop(fdt, nodename, "compatible", compat, sizeof(compat)); + qemu_fdt_setprop(fdt, name, "compatible", compat, sizeof(compat)); } - qemu_fdt_setprop_cells(fdt, nodename, "reg", + qemu_fdt_setprop_cells(fdt, name, "reg", 0x0, memmap[VIRT_TEST].base, 0x0, memmap[VIRT_TEST].size); - qemu_fdt_setprop_cell(fdt, nodename, "phandle", test_phandle); - test_phandle = qemu_fdt_get_phandle(fdt, nodename); - g_free(nodename); - - nodename = g_strdup_printf("/reboot"); - qemu_fdt_add_subnode(fdt, nodename); - qemu_fdt_setprop_string(fdt, nodename, "compatible", "syscon-reboot"); - qemu_fdt_setprop_cell(fdt, nodename, "regmap", test_phandle); - qemu_fdt_setprop_cell(fdt, nodename, "offset", 0x0); - qemu_fdt_setprop_cell(fdt, nodename, "value", FINISHER_RESET); - g_free(nodename); - - nodename = g_strdup_printf("/poweroff"); - qemu_fdt_add_subnode(fdt, nodename); - qemu_fdt_setprop_string(fdt, nodename, "compatible", "syscon-poweroff"); - qemu_fdt_setprop_cell(fdt, nodename, "regmap", test_phandle); - qemu_fdt_setprop_cell(fdt, nodename, "offset", 0x0); - qemu_fdt_setprop_cell(fdt, nodename, "value", FINISHER_PASS); - g_free(nodename); - - nodename = g_strdup_printf("/uart@%lx", - (long)memmap[VIRT_UART0].base); - qemu_fdt_add_subnode(fdt, nodename); - qemu_fdt_setprop_string(fdt, nodename, "compatible", "ns16550a"); - qemu_fdt_setprop_cells(fdt, nodename, "reg", + qemu_fdt_setprop_cell(fdt, name, "phandle", test_phandle); + test_phandle = qemu_fdt_get_phandle(fdt, name); + g_free(name); + + name = g_strdup_printf("/soc/reboot"); + qemu_fdt_add_subnode(fdt, name); + qemu_fdt_setprop_string(fdt, name, "compatible", "syscon-reboot"); + qemu_fdt_setprop_cell(fdt, name, "regmap", test_phandle); + qemu_fdt_setprop_cell(fdt, name, "offset", 0x0); + qemu_fdt_setprop_cell(fdt, name, "value", FINISHER_RESET); + g_free(name); + + name = g_strdup_printf("/soc/poweroff"); + qemu_fdt_add_subnode(fdt, name); + qemu_fdt_setprop_string(fdt, name, "compatible", "syscon-poweroff"); + qemu_fdt_setprop_cell(fdt, name, "regmap", test_phandle); + qemu_fdt_setprop_cell(fdt, name, "offset", 0x0); + qemu_fdt_setprop_cell(fdt, name, "value", FINISHER_PASS); + g_free(name); + + name = g_strdup_printf("/soc/uart@%lx", (long)memmap[VIRT_UART0].base); + qemu_fdt_add_subnode(fdt, name); + qemu_fdt_setprop_string(fdt, name, "compatible", "ns16550a"); + qemu_fdt_setprop_cells(fdt, name, "reg", 0x0, memmap[VIRT_UART0].base, 0x0, memmap[VIRT_UART0].size); - qemu_fdt_setprop_cell(fdt, nodename, "clock-frequency", 3686400); - qemu_fdt_setprop_cell(fdt, nodename, "interrupt-parent", plic_phandle); - qemu_fdt_setprop_cell(fdt, nodename, "interrupts", UART0_IRQ); + qemu_fdt_setprop_cell(fdt, name, "clock-frequency", 3686400); + qemu_fdt_setprop_cell(fdt, name, "interrupt-parent", plic_mmio_phandle); + qemu_fdt_setprop_cell(fdt, name, "interrupts", UART0_IRQ); qemu_fdt_add_subnode(fdt, "/chosen"); - qemu_fdt_setprop_string(fdt, "/chosen", "stdout-path", nodename); + qemu_fdt_setprop_string(fdt, "/chosen", "stdout-path", name); if (cmdline) { qemu_fdt_setprop_string(fdt, "/chosen", "bootargs", cmdline); } - g_free(nodename); - - nodename = g_strdup_printf("/rtc@%lx", - (long)memmap[VIRT_RTC].base); - qemu_fdt_add_subnode(fdt, nodename); - qemu_fdt_setprop_string(fdt, nodename, "compatible", - "google,goldfish-rtc"); - qemu_fdt_setprop_cells(fdt, nodename, "reg", + g_free(name); + + name = g_strdup_printf("/soc/rtc@%lx", (long)memmap[VIRT_RTC].base); + qemu_fdt_add_subnode(fdt, name); + qemu_fdt_setprop_string(fdt, name, "compatible", "google,goldfish-rtc"); + qemu_fdt_setprop_cells(fdt, name, "reg", 0x0, memmap[VIRT_RTC].base, 0x0, memmap[VIRT_RTC].size); - qemu_fdt_setprop_cell(fdt, nodename, "interrupt-parent", plic_phandle); - qemu_fdt_setprop_cell(fdt, nodename, "interrupts", RTC_IRQ); - g_free(nodename); - - nodename = g_strdup_printf("/flash@%" PRIx64, flashbase); - qemu_fdt_add_subnode(s->fdt, nodename); - qemu_fdt_setprop_string(s->fdt, nodename, "compatible", "cfi-flash"); - qemu_fdt_setprop_sized_cells(s->fdt, nodename, "reg", + qemu_fdt_setprop_cell(fdt, name, "interrupt-parent", plic_mmio_phandle); + qemu_fdt_setprop_cell(fdt, name, "interrupts", RTC_IRQ); + g_free(name); + + name = g_strdup_printf("/soc/flash@%" PRIx64, flashbase); + qemu_fdt_add_subnode(s->fdt, name); + qemu_fdt_setprop_string(s->fdt, name, "compatible", "cfi-flash"); + qemu_fdt_setprop_sized_cells(s->fdt, name, "reg", 2, flashbase, 2, flashsize, 2, flashbase + flashsize, 2, flashsize); - qemu_fdt_setprop_cell(s->fdt, nodename, "bank-width", 4); - g_free(nodename); + qemu_fdt_setprop_cell(s->fdt, name, "bank-width", 4); + g_free(name); } - static inline DeviceState *gpex_pcie_init(MemoryRegion *sys_mem, hwaddr ecam_base, hwaddr ecam_size, hwaddr mmio_base, hwaddr mmio_size, @@ -475,22 +490,101 @@ static void virt_machine_init(MachineState *machine) MemoryRegion *system_memory = get_system_memory(); MemoryRegion *main_mem = g_new(MemoryRegion, 1); MemoryRegion *mask_rom = g_new(MemoryRegion, 1); - char *plic_hart_config; + char *plic_hart_config, *soc_name; size_t plic_hart_config_len; target_ulong start_addr = memmap[VIRT_DRAM].base; uint32_t fdt_load_addr; uint64_t kernel_entry; - int i; - unsigned int smp_cpus = machine->smp.cpus; + DeviceState *mmio_plic, *virtio_plic, *pcie_plic; + int i, j, base_hartid, hart_count; - /* Initialize SOC */ - object_initialize_child(OBJECT(machine), "soc", &s->soc, - TYPE_RISCV_HART_ARRAY); - object_property_set_str(OBJECT(&s->soc), "cpu-type", machine->cpu_type, - &error_abort); - object_property_set_int(OBJECT(&s->soc), "num-harts", smp_cpus, - &error_abort); - sysbus_realize(SYS_BUS_DEVICE(&s->soc), &error_abort); + /* Check socket count limit */ + if (VIRT_SOCKETS_MAX < riscv_socket_count(machine)) { + error_report("number of sockets/nodes should be less than %d", + VIRT_SOCKETS_MAX); + exit(1); + } + + /* Initialize sockets */ + mmio_plic = virtio_plic = pcie_plic = NULL; + for (i = 0; i < riscv_socket_count(machine); i++) { + if (!riscv_socket_check_hartids(machine, i)) { + error_report("discontinuous hartids in socket%d", i); + exit(1); + } + + base_hartid = riscv_socket_first_hartid(machine, i); + if (base_hartid < 0) { + error_report("can't find hartid base for socket%d", i); + exit(1); + } + + hart_count = riscv_socket_hart_count(machine, i); + if (hart_count < 0) { + error_report("can't find hart count for socket%d", i); + exit(1); + } + + soc_name = g_strdup_printf("soc%d", i); + object_initialize_child(OBJECT(machine), soc_name, &s->soc[i], + TYPE_RISCV_HART_ARRAY); + g_free(soc_name); + object_property_set_str(OBJECT(&s->soc[i]), "cpu-type", + machine->cpu_type, &error_abort); + object_property_set_int(OBJECT(&s->soc[i]), "hartid-base", + base_hartid, &error_abort); + object_property_set_int(OBJECT(&s->soc[i]), "num-harts", + hart_count, &error_abort); + sysbus_realize(SYS_BUS_DEVICE(&s->soc[i]), &error_abort); + + /* Per-socket CLINT */ + sifive_clint_create( + memmap[VIRT_CLINT].base + i * memmap[VIRT_CLINT].size, + memmap[VIRT_CLINT].size, base_hartid, hart_count, + SIFIVE_SIP_BASE, SIFIVE_TIMECMP_BASE, SIFIVE_TIME_BASE, true); + + /* Per-socket PLIC hart topology configuration string */ + plic_hart_config_len = + (strlen(VIRT_PLIC_HART_CONFIG) + 1) * hart_count; + plic_hart_config = g_malloc0(plic_hart_config_len); + for (j = 0; j < hart_count; j++) { + if (j != 0) { + strncat(plic_hart_config, ",", plic_hart_config_len); + } + strncat(plic_hart_config, VIRT_PLIC_HART_CONFIG, + plic_hart_config_len); + plic_hart_config_len -= (strlen(VIRT_PLIC_HART_CONFIG) + 1); + } + + /* Per-socket PLIC */ + s->plic[i] = sifive_plic_create( + memmap[VIRT_PLIC].base + i * memmap[VIRT_PLIC].size, + plic_hart_config, base_hartid, + VIRT_PLIC_NUM_SOURCES, + VIRT_PLIC_NUM_PRIORITIES, + VIRT_PLIC_PRIORITY_BASE, + VIRT_PLIC_PENDING_BASE, + VIRT_PLIC_ENABLE_BASE, + VIRT_PLIC_ENABLE_STRIDE, + VIRT_PLIC_CONTEXT_BASE, + VIRT_PLIC_CONTEXT_STRIDE, + memmap[VIRT_PLIC].size); + g_free(plic_hart_config); + + /* Try to use different PLIC instance based device type */ + if (i == 0) { + mmio_plic = s->plic[i]; + virtio_plic = s->plic[i]; + pcie_plic = s->plic[i]; + } + if (i == 1) { + virtio_plic = s->plic[i]; + pcie_plic = s->plic[i]; + } + if (i == 2) { + pcie_plic = s->plic[i]; + } + } /* register system main memory (actual RAM) */ memory_region_init_ram(main_mem, NULL, "riscv_virt_board.ram", @@ -547,38 +641,14 @@ static void virt_machine_init(MachineState *machine) virt_memmap[VIRT_MROM].size, kernel_entry, fdt_load_addr, s->fdt); - /* create PLIC hart topology configuration string */ - plic_hart_config_len = (strlen(VIRT_PLIC_HART_CONFIG) + 1) * smp_cpus; - plic_hart_config = g_malloc0(plic_hart_config_len); - for (i = 0; i < smp_cpus; i++) { - if (i != 0) { - strncat(plic_hart_config, ",", plic_hart_config_len); - } - strncat(plic_hart_config, VIRT_PLIC_HART_CONFIG, plic_hart_config_len); - plic_hart_config_len -= (strlen(VIRT_PLIC_HART_CONFIG) + 1); - } - - /* MMIO */ - s->plic = sifive_plic_create(memmap[VIRT_PLIC].base, - plic_hart_config, - VIRT_PLIC_NUM_SOURCES, - VIRT_PLIC_NUM_PRIORITIES, - VIRT_PLIC_PRIORITY_BASE, - VIRT_PLIC_PENDING_BASE, - VIRT_PLIC_ENABLE_BASE, - VIRT_PLIC_ENABLE_STRIDE, - VIRT_PLIC_CONTEXT_BASE, - VIRT_PLIC_CONTEXT_STRIDE, - memmap[VIRT_PLIC].size); - sifive_clint_create(memmap[VIRT_CLINT].base, - memmap[VIRT_CLINT].size, smp_cpus, - SIFIVE_SIP_BASE, SIFIVE_TIMECMP_BASE, SIFIVE_TIME_BASE, true); + /* SiFive Test MMIO device */ sifive_test_create(memmap[VIRT_TEST].base); + /* VirtIO MMIO devices */ for (i = 0; i < VIRTIO_COUNT; i++) { sysbus_create_simple("virtio-mmio", memmap[VIRT_VIRTIO].base + i * memmap[VIRT_VIRTIO].size, - qdev_get_gpio_in(DEVICE(s->plic), VIRTIO_IRQ + i)); + qdev_get_gpio_in(DEVICE(virtio_plic), VIRTIO_IRQ + i)); } gpex_pcie_init(system_memory, @@ -587,14 +657,14 @@ static void virt_machine_init(MachineState *machine) memmap[VIRT_PCIE_MMIO].base, memmap[VIRT_PCIE_MMIO].size, memmap[VIRT_PCIE_PIO].base, - DEVICE(s->plic), true); + DEVICE(pcie_plic), true); serial_mm_init(system_memory, memmap[VIRT_UART0].base, - 0, qdev_get_gpio_in(DEVICE(s->plic), UART0_IRQ), 399193, + 0, qdev_get_gpio_in(DEVICE(mmio_plic), UART0_IRQ), 399193, serial_hd(0), DEVICE_LITTLE_ENDIAN); sysbus_create_simple("goldfish_rtc", memmap[VIRT_RTC].base, - qdev_get_gpio_in(DEVICE(s->plic), RTC_IRQ)); + qdev_get_gpio_in(DEVICE(mmio_plic), RTC_IRQ)); virt_flash_create(s); @@ -604,8 +674,6 @@ static void virt_machine_init(MachineState *machine) drive_get(IF_PFLASH, 0, i)); } virt_flash_map(s, system_memory); - - g_free(plic_hart_config); } static void virt_machine_instance_init(Object *obj) @@ -618,9 +686,13 @@ static void virt_machine_class_init(ObjectClass *oc, void *data) mc->desc = "RISC-V VirtIO board"; mc->init = virt_machine_init; - mc->max_cpus = 8; + mc->max_cpus = VIRT_CPUS_MAX; mc->default_cpu_type = VIRT_CPU; mc->pci_allow_0_address = true; + mc->possible_cpu_arch_ids = riscv_numa_possible_cpu_arch_ids; + mc->cpu_index_to_instance_props = riscv_numa_cpu_index_to_props; + mc->get_default_cpu_node_id = riscv_numa_get_default_cpu_node_id; + mc->numa_mem_supported = true; } static const TypeInfo virt_machine_typeinfo = { diff --git a/hw/s390x/virtio-ccw-input.c b/hw/s390x/virtio-ccw-input.c index 5601e25dee..83136fbba1 100644 --- a/hw/s390x/virtio-ccw-input.c +++ b/hw/s390x/virtio-ccw-input.c @@ -1,5 +1,5 @@ /* - * virtio ccw scsi implementation + * virtio ccw input implementation * * Copyright 2012, 2015 IBM Corp. * diff --git a/hw/scsi/vhost-scsi.c b/hw/scsi/vhost-scsi.c index 13b05af29b..a83ffeefc8 100644 --- a/hw/scsi/vhost-scsi.c +++ b/hw/scsi/vhost-scsi.c @@ -270,7 +270,8 @@ static Property vhost_scsi_properties[] = { DEFINE_PROP_STRING("vhostfd", VirtIOSCSICommon, conf.vhostfd), DEFINE_PROP_STRING("wwpn", VirtIOSCSICommon, conf.wwpn), DEFINE_PROP_UINT32("boot_tpgt", VirtIOSCSICommon, conf.boot_tpgt, 0), - DEFINE_PROP_UINT32("num_queues", VirtIOSCSICommon, conf.num_queues, 1), + DEFINE_PROP_UINT32("num_queues", VirtIOSCSICommon, conf.num_queues, + VIRTIO_SCSI_AUTO_NUM_QUEUES), DEFINE_PROP_UINT32("virtqueue_size", VirtIOSCSICommon, conf.virtqueue_size, 128), DEFINE_PROP_BOOL("seg_max_adjust", VirtIOSCSICommon, conf.seg_max_adjust, diff --git a/hw/scsi/vhost-user-scsi.c b/hw/scsi/vhost-user-scsi.c index f2e524438a..7c0631656c 100644 --- a/hw/scsi/vhost-user-scsi.c +++ b/hw/scsi/vhost-user-scsi.c @@ -114,7 +114,7 @@ static void vhost_user_scsi_realize(DeviceState *dev, Error **errp) goto free_virtio; } - vsc->dev.nvqs = 2 + vs->conf.num_queues; + vsc->dev.nvqs = VIRTIO_SCSI_VQ_NUM_FIXED + vs->conf.num_queues; vsc->dev.vqs = g_new0(struct vhost_virtqueue, vsc->dev.nvqs); vsc->dev.vq_index = 0; vsc->dev.backend_features = 0; @@ -162,7 +162,8 @@ static void vhost_user_scsi_unrealize(DeviceState *dev) static Property vhost_user_scsi_properties[] = { DEFINE_PROP_CHR("chardev", VirtIOSCSICommon, conf.chardev), DEFINE_PROP_UINT32("boot_tpgt", VirtIOSCSICommon, conf.boot_tpgt, 0), - DEFINE_PROP_UINT32("num_queues", VirtIOSCSICommon, conf.num_queues, 1), + DEFINE_PROP_UINT32("num_queues", VirtIOSCSICommon, conf.num_queues, + VIRTIO_SCSI_AUTO_NUM_QUEUES), DEFINE_PROP_UINT32("virtqueue_size", VirtIOSCSICommon, conf.virtqueue_size, 128), DEFINE_PROP_UINT32("max_sectors", VirtIOSCSICommon, conf.max_sectors, diff --git a/hw/scsi/virtio-scsi.c b/hw/scsi/virtio-scsi.c index b49775269e..3a71ea7097 100644 --- a/hw/scsi/virtio-scsi.c +++ b/hw/scsi/virtio-scsi.c @@ -191,7 +191,7 @@ static void virtio_scsi_save_request(QEMUFile *f, SCSIRequest *sreq) VirtIOSCSIReq *req = sreq->hba_private; VirtIOSCSICommon *vs = VIRTIO_SCSI_COMMON(req->dev); VirtIODevice *vdev = VIRTIO_DEVICE(req->dev); - uint32_t n = virtio_get_queue_index(req->vq) - 2; + uint32_t n = virtio_get_queue_index(req->vq) - VIRTIO_SCSI_VQ_NUM_FIXED; assert(n < vs->conf.num_queues); qemu_put_be32s(f, &n); @@ -891,11 +891,15 @@ void virtio_scsi_common_realize(DeviceState *dev, virtio_init(vdev, "virtio-scsi", VIRTIO_ID_SCSI, sizeof(VirtIOSCSIConfig)); + if (s->conf.num_queues == VIRTIO_SCSI_AUTO_NUM_QUEUES) { + s->conf.num_queues = 1; + } if (s->conf.num_queues == 0 || - s->conf.num_queues > VIRTIO_QUEUE_MAX - 2) { + s->conf.num_queues > VIRTIO_QUEUE_MAX - VIRTIO_SCSI_VQ_NUM_FIXED) { error_setg(errp, "Invalid number of queues (= %" PRIu32 "), " "must be a positive integer less than %d.", - s->conf.num_queues, VIRTIO_QUEUE_MAX - 2); + s->conf.num_queues, + VIRTIO_QUEUE_MAX - VIRTIO_SCSI_VQ_NUM_FIXED); virtio_cleanup(vdev); return; } @@ -963,7 +967,8 @@ static void virtio_scsi_device_unrealize(DeviceState *dev) } static Property virtio_scsi_properties[] = { - DEFINE_PROP_UINT32("num_queues", VirtIOSCSI, parent_obj.conf.num_queues, 1), + DEFINE_PROP_UINT32("num_queues", VirtIOSCSI, parent_obj.conf.num_queues, + VIRTIO_SCSI_AUTO_NUM_QUEUES), DEFINE_PROP_UINT32("virtqueue_size", VirtIOSCSI, parent_obj.conf.virtqueue_size, 256), DEFINE_PROP_BOOL("seg_max_adjust", VirtIOSCSI, diff --git a/hw/smbios/smbios.c b/hw/smbios/smbios.c index f560826904..7cc950b41c 100644 --- a/hw/smbios/smbios.c +++ b/hw/smbios/smbios.c @@ -92,9 +92,21 @@ static struct { const char *manufacturer, *version, *serial, *asset, *sku; } type3; +/* + * SVVP requires max_speed and current_speed to be set and not being + * 0 which counts as unknown (SMBIOS 3.1.0/Table 21). Set the + * default value to 2000MHz as we did before. + */ +#define DEFAULT_CPU_SPEED 2000 + static struct { const char *sock_pfx, *manufacturer, *version, *serial, *asset, *part; -} type4; + uint64_t max_speed; + uint64_t current_speed; +} type4 = { + .max_speed = DEFAULT_CPU_SPEED, + .current_speed = DEFAULT_CPU_SPEED +}; static struct { size_t nvalues; @@ -273,6 +285,14 @@ static const QemuOptDesc qemu_smbios_type4_opts[] = { .type = QEMU_OPT_STRING, .help = "version number", },{ + .name = "max-speed", + .type = QEMU_OPT_NUMBER, + .help = "max speed in MHz", + },{ + .name = "current-speed", + .type = QEMU_OPT_NUMBER, + .help = "speed at system boot in MHz", + },{ .name = "serial", .type = QEMU_OPT_STRING, .help = "serial number", @@ -586,9 +606,8 @@ static void smbios_build_type_4_table(MachineState *ms, unsigned instance) SMBIOS_TABLE_SET_STR(4, processor_version_str, type4.version); t->voltage = 0; t->external_clock = cpu_to_le16(0); /* Unknown */ - /* SVVP requires max_speed and current_speed to not be unknown. */ - t->max_speed = cpu_to_le16(2000); /* 2000 MHz */ - t->current_speed = cpu_to_le16(2000); /* 2000 MHz */ + t->max_speed = cpu_to_le16(type4.max_speed); + t->current_speed = cpu_to_le16(type4.current_speed); t->status = 0x41; /* Socket populated, CPU enabled */ t->processor_upgrade = 0x01; /* Other */ t->l1_cache_handle = cpu_to_le16(0xFFFF); /* N/A */ @@ -1116,6 +1135,15 @@ void smbios_entry_add(QemuOpts *opts, Error **errp) save_opt(&type4.serial, opts, "serial"); save_opt(&type4.asset, opts, "asset"); save_opt(&type4.part, opts, "part"); + type4.max_speed = qemu_opt_get_number(opts, "max-speed", + DEFAULT_CPU_SPEED); + type4.current_speed = qemu_opt_get_number(opts, "current-speed", + DEFAULT_CPU_SPEED); + if (type4.max_speed > UINT16_MAX || + type4.current_speed > UINT16_MAX) { + error_setg(errp, "SMBIOS CPU speed is too large (> %d)", + UINT16_MAX); + } return; case 11: if (!qemu_opts_validate(opts, qemu_smbios_type11_opts, errp)) { diff --git a/hw/vfio/ap.c b/hw/vfio/ap.c index b9330a8e6f..cec6fe1599 100644 --- a/hw/vfio/ap.c +++ b/hw/vfio/ap.c @@ -71,6 +71,7 @@ static VFIOGroup *vfio_ap_get_group(VFIOAPDevice *vapdev, Error **errp) if (!group_path) { error_setg(errp, "%s: no iommu_group found for %s: %s", VFIO_AP_DEVICE_TYPE, vapdev->vdev.sysfsdev, gerror->message); + g_error_free(gerror); return NULL; } diff --git a/hw/virtio/vhost-scsi-pci.c b/hw/virtio/vhost-scsi-pci.c index 095af23f3f..a6bb0dc60d 100644 --- a/hw/virtio/vhost-scsi-pci.c +++ b/hw/virtio/vhost-scsi-pci.c @@ -47,10 +47,15 @@ static void vhost_scsi_pci_realize(VirtIOPCIProxy *vpci_dev, Error **errp) { VHostSCSIPCI *dev = VHOST_SCSI_PCI(vpci_dev); DeviceState *vdev = DEVICE(&dev->vdev); - VirtIOSCSICommon *vs = VIRTIO_SCSI_COMMON(vdev); + VirtIOSCSIConf *conf = &dev->vdev.parent_obj.parent_obj.conf; + + if (conf->num_queues == VIRTIO_SCSI_AUTO_NUM_QUEUES) { + conf->num_queues = + virtio_pci_optimal_num_queues(VIRTIO_SCSI_VQ_NUM_FIXED); + } if (vpci_dev->nvectors == DEV_NVECTORS_UNSPECIFIED) { - vpci_dev->nvectors = vs->conf.num_queues + 3; + vpci_dev->nvectors = conf->num_queues + VIRTIO_SCSI_VQ_NUM_FIXED + 1; } qdev_realize(vdev, BUS(&vpci_dev->bus), errp); diff --git a/hw/virtio/vhost-user-blk-pci.c b/hw/virtio/vhost-user-blk-pci.c index 4f5d5cbf44..a62a71e067 100644 --- a/hw/virtio/vhost-user-blk-pci.c +++ b/hw/virtio/vhost-user-blk-pci.c @@ -54,6 +54,10 @@ static void vhost_user_blk_pci_realize(VirtIOPCIProxy *vpci_dev, Error **errp) VHostUserBlkPCI *dev = VHOST_USER_BLK_PCI(vpci_dev); DeviceState *vdev = DEVICE(&dev->vdev); + if (dev->vdev.num_queues == VHOST_USER_BLK_AUTO_NUM_QUEUES) { + dev->vdev.num_queues = virtio_pci_optimal_num_queues(0); + } + if (vpci_dev->nvectors == DEV_NVECTORS_UNSPECIFIED) { vpci_dev->nvectors = dev->vdev.num_queues + 1; } diff --git a/hw/virtio/vhost-user-scsi-pci.c b/hw/virtio/vhost-user-scsi-pci.c index 4705cd54e8..25e97ca54e 100644 --- a/hw/virtio/vhost-user-scsi-pci.c +++ b/hw/virtio/vhost-user-scsi-pci.c @@ -53,10 +53,15 @@ static void vhost_user_scsi_pci_realize(VirtIOPCIProxy *vpci_dev, Error **errp) { VHostUserSCSIPCI *dev = VHOST_USER_SCSI_PCI(vpci_dev); DeviceState *vdev = DEVICE(&dev->vdev); - VirtIOSCSICommon *vs = VIRTIO_SCSI_COMMON(vdev); + VirtIOSCSIConf *conf = &dev->vdev.parent_obj.parent_obj.conf; + + if (conf->num_queues == VIRTIO_SCSI_AUTO_NUM_QUEUES) { + conf->num_queues = + virtio_pci_optimal_num_queues(VIRTIO_SCSI_VQ_NUM_FIXED); + } if (vpci_dev->nvectors == DEV_NVECTORS_UNSPECIFIED) { - vpci_dev->nvectors = vs->conf.num_queues + 3; + vpci_dev->nvectors = conf->num_queues + VIRTIO_SCSI_VQ_NUM_FIXED + 1; } qdev_realize(vdev, BUS(&vpci_dev->bus), errp); diff --git a/hw/virtio/virtio-blk-pci.c b/hw/virtio/virtio-blk-pci.c index 849cc7dfd8..37c6e0aeb4 100644 --- a/hw/virtio/virtio-blk-pci.c +++ b/hw/virtio/virtio-blk-pci.c @@ -50,9 +50,14 @@ static void virtio_blk_pci_realize(VirtIOPCIProxy *vpci_dev, Error **errp) { VirtIOBlkPCI *dev = VIRTIO_BLK_PCI(vpci_dev); DeviceState *vdev = DEVICE(&dev->vdev); + VirtIOBlkConf *conf = &dev->vdev.conf; + + if (conf->num_queues == VIRTIO_BLK_AUTO_NUM_QUEUES) { + conf->num_queues = virtio_pci_optimal_num_queues(0); + } if (vpci_dev->nvectors == DEV_NVECTORS_UNSPECIFIED) { - vpci_dev->nvectors = dev->vdev.conf.num_queues + 1; + vpci_dev->nvectors = conf->num_queues + 1; } qdev_realize(vdev, BUS(&vpci_dev->bus), errp); diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c index ccdf54e81c..fc69570dcc 100644 --- a/hw/virtio/virtio-pci.c +++ b/hw/virtio/virtio-pci.c @@ -19,6 +19,7 @@ #include "exec/memop.h" #include "standard-headers/linux/virtio_pci.h" +#include "hw/boards.h" #include "hw/virtio/virtio.h" #include "migration/qemu-file-types.h" #include "hw/pci/pci.h" @@ -2058,6 +2059,37 @@ void virtio_pci_types_register(const VirtioPCIDeviceTypeInfo *t) g_free(base_name); } +unsigned virtio_pci_optimal_num_queues(unsigned fixed_queues) +{ + /* + * 1:1 vq to vCPU mapping is ideal because the same vCPU that submitted + * virtqueue buffers can handle their completion. When a different vCPU + * handles completion it may need to IPI the vCPU that submitted the + * request and this adds overhead. + * + * Virtqueues consume guest RAM and MSI-X vectors. This is wasteful in + * guests with very many vCPUs and a device that is only used by a few + * vCPUs. Unfortunately optimizing that case requires manual pinning inside + * the guest, so those users might as well manually set the number of + * queues. There is no upper limit that can be applied automatically and + * doing so arbitrarily would result in a sudden performance drop once the + * threshold number of vCPUs is exceeded. + */ + unsigned num_queues = current_machine->smp.cpus; + + /* + * The maximum number of MSI-X vectors is PCI_MSIX_FLAGS_QSIZE + 1, but the + * config change interrupt and the fixed virtqueues must be taken into + * account too. + */ + num_queues = MIN(num_queues, PCI_MSIX_FLAGS_QSIZE - fixed_queues); + + /* + * There is a limit to how many virtqueues a device can have. + */ + return MIN(num_queues, VIRTIO_QUEUE_MAX - fixed_queues); +} + /* virtio-pci-bus */ static void virtio_pci_bus_new(VirtioBusState *bus, size_t bus_size, diff --git a/hw/virtio/virtio-pci.h b/hw/virtio/virtio-pci.h index e2eaaa9182..91096f0291 100644 --- a/hw/virtio/virtio-pci.h +++ b/hw/virtio/virtio-pci.h @@ -243,4 +243,13 @@ typedef struct VirtioPCIDeviceTypeInfo { /* Register virtio-pci type(s). @t must be static. */ void virtio_pci_types_register(const VirtioPCIDeviceTypeInfo *t); +/** + * virtio_pci_optimal_num_queues: + * @fixed_queues: number of queues that are always present + * + * Returns: The optimal number of queues for a multi-queue device, excluding + * @fixed_queues. + */ +unsigned virtio_pci_optimal_num_queues(unsigned fixed_queues); + #endif diff --git a/hw/virtio/virtio-scsi-pci.c b/hw/virtio/virtio-scsi-pci.c index c23a134202..fa4b3bfb50 100644 --- a/hw/virtio/virtio-scsi-pci.c +++ b/hw/virtio/virtio-scsi-pci.c @@ -46,12 +46,17 @@ static void virtio_scsi_pci_realize(VirtIOPCIProxy *vpci_dev, Error **errp) { VirtIOSCSIPCI *dev = VIRTIO_SCSI_PCI(vpci_dev); DeviceState *vdev = DEVICE(&dev->vdev); - VirtIOSCSICommon *vs = VIRTIO_SCSI_COMMON(vdev); DeviceState *proxy = DEVICE(vpci_dev); + VirtIOSCSIConf *conf = &dev->vdev.parent_obj.conf; char *bus_name; + if (conf->num_queues == VIRTIO_SCSI_AUTO_NUM_QUEUES) { + conf->num_queues = + virtio_pci_optimal_num_queues(VIRTIO_SCSI_VQ_NUM_FIXED); + } + if (vpci_dev->nvectors == DEV_NVECTORS_UNSPECIFIED) { - vpci_dev->nvectors = vs->conf.num_queues + 3; + vpci_dev->nvectors = conf->num_queues + VIRTIO_SCSI_VQ_NUM_FIXED + 1; } /* diff --git a/include/block/block_int.h b/include/block/block_int.h index 38dec0275b..9da7a42927 100644 --- a/include/block/block_int.h +++ b/include/block/block_int.h @@ -59,6 +59,7 @@ #define BLOCK_OPT_DATA_FILE "data_file" #define BLOCK_OPT_DATA_FILE_RAW "data_file_raw" #define BLOCK_OPT_COMPRESSION_TYPE "compression_type" +#define BLOCK_OPT_EXTL2 "extended_l2" #define BLOCK_PROBE_BUF_SIZE 512 diff --git a/include/hw/acpi/pcihp.h b/include/hw/acpi/pcihp.h index 8bc4a4c01d..02f4665767 100644 --- a/include/hw/acpi/pcihp.h +++ b/include/hw/acpi/pcihp.h @@ -67,7 +67,7 @@ void acpi_pcihp_device_unplug_request_cb(HotplugHandler *hotplug_dev, Error **errp); /* Called on reset */ -void acpi_pcihp_reset(AcpiPciHpState *s); +void acpi_pcihp_reset(AcpiPciHpState *s, bool acpihp_root_off); extern const VMStateDescription vmstate_acpi_pcihp_pci_status; diff --git a/include/hw/riscv/numa.h b/include/hw/riscv/numa.h new file mode 100644 index 0000000000..fcce942cee --- /dev/null +++ b/include/hw/riscv/numa.h @@ -0,0 +1,113 @@ +/* + * QEMU RISC-V NUMA Helper + * + * Copyright (c) 2020 Western Digital Corporation or its affiliates. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2 or later, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#ifndef RISCV_NUMA_H +#define RISCV_NUMA_H + +#include "hw/sysbus.h" +#include "sysemu/numa.h" + +/** + * riscv_socket_count: + * @ms: pointer to machine state + * + * Returns: number of sockets for a numa system and 1 for a non-numa system + */ +int riscv_socket_count(const MachineState *ms); + +/** + * riscv_socket_first_hartid: + * @ms: pointer to machine state + * @socket_id: socket index + * + * Returns: first hartid for a valid socket and -1 for an invalid socket + */ +int riscv_socket_first_hartid(const MachineState *ms, int socket_id); + +/** + * riscv_socket_last_hartid: + * @ms: pointer to machine state + * @socket_id: socket index + * + * Returns: last hartid for a valid socket and -1 for an invalid socket + */ +int riscv_socket_last_hartid(const MachineState *ms, int socket_id); + +/** + * riscv_socket_hart_count: + * @ms: pointer to machine state + * @socket_id: socket index + * + * Returns: number of harts for a valid socket and -1 for an invalid socket + */ +int riscv_socket_hart_count(const MachineState *ms, int socket_id); + +/** + * riscv_socket_mem_offset: + * @ms: pointer to machine state + * @socket_id: socket index + * + * Returns: offset of ram belonging to given socket + */ +uint64_t riscv_socket_mem_offset(const MachineState *ms, int socket_id); + +/** + * riscv_socket_mem_size: + * @ms: pointer to machine state + * @socket_id: socket index + * + * Returns: size of ram belonging to given socket + */ +uint64_t riscv_socket_mem_size(const MachineState *ms, int socket_id); + +/** + * riscv_socket_check_hartids: + * @ms: pointer to machine state + * @socket_id: socket index + * + * Returns: true if hardids belonging to given socket are contiguous else false + */ +bool riscv_socket_check_hartids(const MachineState *ms, int socket_id); + +/** + * riscv_socket_fdt_write_id: + * @ms: pointer to machine state + * @socket_id: socket index + * + * Write NUMA node-id FDT property for given FDT node + */ +void riscv_socket_fdt_write_id(const MachineState *ms, void *fdt, + const char *node_name, int socket_id); + +/** + * riscv_socket_fdt_write_distance_matrix: + * @ms: pointer to machine state + * @socket_id: socket index + * + * Write NUMA distance matrix in FDT for given machine + */ +void riscv_socket_fdt_write_distance_matrix(const MachineState *ms, void *fdt); + +CpuInstanceProperties +riscv_numa_cpu_index_to_props(MachineState *ms, unsigned cpu_index); + +int64_t riscv_numa_get_default_cpu_node_id(const MachineState *ms, int idx); + +const CPUArchIdList *riscv_numa_possible_cpu_arch_ids(MachineState *ms); + +#endif /* RISCV_NUMA_H */ diff --git a/include/hw/riscv/sifive_clint.h b/include/hw/riscv/sifive_clint.h index 4a720bfece..9f5fb3d31d 100644 --- a/include/hw/riscv/sifive_clint.h +++ b/include/hw/riscv/sifive_clint.h @@ -33,6 +33,7 @@ typedef struct SiFiveCLINTState { /*< public >*/ MemoryRegion mmio; + uint32_t hartid_base; uint32_t num_harts; uint32_t sip_base; uint32_t timecmp_base; @@ -40,9 +41,9 @@ typedef struct SiFiveCLINTState { uint32_t aperture_size; } SiFiveCLINTState; -DeviceState *sifive_clint_create(hwaddr addr, hwaddr size, uint32_t num_harts, - uint32_t sip_base, uint32_t timecmp_base, uint32_t time_base, - bool provide_rdtime); +DeviceState *sifive_clint_create(hwaddr addr, hwaddr size, + uint32_t hartid_base, uint32_t num_harts, uint32_t sip_base, + uint32_t timecmp_base, uint32_t time_base, bool provide_rdtime); enum { SIFIVE_SIP_BASE = 0x0, diff --git a/include/hw/riscv/sifive_plic.h b/include/hw/riscv/sifive_plic.h index 4421e81249..ace76d0f1b 100644 --- a/include/hw/riscv/sifive_plic.h +++ b/include/hw/riscv/sifive_plic.h @@ -48,6 +48,7 @@ typedef struct SiFivePLICState { /*< public >*/ MemoryRegion mmio; uint32_t num_addrs; + uint32_t num_harts; uint32_t bitfield_words; PLICAddr *addr_config; uint32_t *source_priority; @@ -58,6 +59,7 @@ typedef struct SiFivePLICState { /* config */ char *hart_config; + uint32_t hartid_base; uint32_t num_sources; uint32_t num_priorities; uint32_t priority_base; @@ -70,10 +72,10 @@ typedef struct SiFivePLICState { } SiFivePLICState; DeviceState *sifive_plic_create(hwaddr addr, char *hart_config, - uint32_t num_sources, uint32_t num_priorities, - uint32_t priority_base, uint32_t pending_base, - uint32_t enable_base, uint32_t enable_stride, - uint32_t context_base, uint32_t context_stride, - uint32_t aperture_size); + uint32_t hartid_base, uint32_t num_sources, + uint32_t num_priorities, uint32_t priority_base, + uint32_t pending_base, uint32_t enable_base, + uint32_t enable_stride, uint32_t context_base, + uint32_t context_stride, uint32_t aperture_size); #endif diff --git a/include/hw/riscv/spike.h b/include/hw/riscv/spike.h index 1cd72b85d6..b0a18a9c94 100644 --- a/include/hw/riscv/spike.h +++ b/include/hw/riscv/spike.h @@ -22,12 +22,19 @@ #include "hw/riscv/riscv_hart.h" #include "hw/sysbus.h" +#define SPIKE_CPUS_MAX 8 +#define SPIKE_SOCKETS_MAX 8 + +#define TYPE_SPIKE_MACHINE MACHINE_TYPE_NAME("spike") +#define SPIKE_MACHINE(obj) \ + OBJECT_CHECK(SpikeState, (obj), TYPE_SPIKE_MACHINE) + typedef struct { /*< private >*/ - SysBusDevice parent_obj; + MachineState parent; /*< public >*/ - RISCVHartArrayState soc; + RISCVHartArrayState soc[SPIKE_SOCKETS_MAX]; void *fdt; int fdt_size; } SpikeState; diff --git a/include/hw/riscv/virt.h b/include/hw/riscv/virt.h index e69355efaf..1beacd7666 100644 --- a/include/hw/riscv/virt.h +++ b/include/hw/riscv/virt.h @@ -23,6 +23,9 @@ #include "hw/sysbus.h" #include "hw/block/flash.h" +#define VIRT_CPUS_MAX 8 +#define VIRT_SOCKETS_MAX 8 + #define TYPE_RISCV_VIRT_MACHINE MACHINE_TYPE_NAME("virt") #define RISCV_VIRT_MACHINE(obj) \ OBJECT_CHECK(RISCVVirtState, (obj), TYPE_RISCV_VIRT_MACHINE) @@ -32,8 +35,8 @@ typedef struct { MachineState parent; /*< public >*/ - RISCVHartArrayState soc; - DeviceState *plic; + RISCVHartArrayState soc[VIRT_SOCKETS_MAX]; + DeviceState *plic[VIRT_SOCKETS_MAX]; PFlashCFI01 *flash[2]; void *fdt; @@ -74,6 +77,8 @@ enum { #define VIRT_PLIC_ENABLE_STRIDE 0x80 #define VIRT_PLIC_CONTEXT_BASE 0x200000 #define VIRT_PLIC_CONTEXT_STRIDE 0x1000 +#define VIRT_PLIC_SIZE(__num_context) \ + (VIRT_PLIC_CONTEXT_BASE + (__num_context) * VIRT_PLIC_CONTEXT_STRIDE) #define FDT_PCI_ADDR_CELLS 3 #define FDT_PCI_INT_CELLS 1 diff --git a/include/hw/virtio/vhost-user-blk.h b/include/hw/virtio/vhost-user-blk.h index 34ad6f0c0e..292d17147c 100644 --- a/include/hw/virtio/vhost-user-blk.h +++ b/include/hw/virtio/vhost-user-blk.h @@ -25,6 +25,8 @@ #define VHOST_USER_BLK(obj) \ OBJECT_CHECK(VHostUserBlk, (obj), TYPE_VHOST_USER_BLK) +#define VHOST_USER_BLK_AUTO_NUM_QUEUES UINT16_MAX + typedef struct VHostUserBlk { VirtIODevice parent_obj; CharBackend chardev; diff --git a/include/hw/virtio/virtio-blk.h b/include/hw/virtio/virtio-blk.h index b1334c3904..7539c2b848 100644 --- a/include/hw/virtio/virtio-blk.h +++ b/include/hw/virtio/virtio-blk.h @@ -30,6 +30,8 @@ struct virtio_blk_inhdr unsigned char status; }; +#define VIRTIO_BLK_AUTO_NUM_QUEUES UINT16_MAX + struct VirtIOBlkConf { BlockConf conf; diff --git a/include/hw/virtio/virtio-scsi.h b/include/hw/virtio/virtio-scsi.h index 24e768909d..c0b8e4dd7e 100644 --- a/include/hw/virtio/virtio-scsi.h +++ b/include/hw/virtio/virtio-scsi.h @@ -36,6 +36,11 @@ #define VIRTIO_SCSI_MAX_TARGET 255 #define VIRTIO_SCSI_MAX_LUN 16383 +/* Number of virtqueues that are always present */ +#define VIRTIO_SCSI_VQ_NUM_FIXED 2 + +#define VIRTIO_SCSI_AUTO_NUM_QUEUES UINT32_MAX + typedef struct virtio_scsi_cmd_req VirtIOSCSICmdReq; typedef struct virtio_scsi_cmd_resp VirtIOSCSICmdResp; typedef struct virtio_scsi_ctrl_tmf_req VirtIOSCSICtrlTMFReq; diff --git a/pc-bios/hppa-firmware.img b/pc-bios/hppa-firmware.img index 82d98b1353..f0f8d0e164 100644 --- a/pc-bios/hppa-firmware.img +++ b/pc-bios/hppa-firmware.img Binary files differdiff --git a/qapi/block-core.json b/qapi/block-core.json index 197bdc1c36..db08c58d78 100644 --- a/qapi/block-core.json +++ b/qapi/block-core.json @@ -67,6 +67,9 @@ # standalone (read-only) raw image without looking at qcow2 # metadata (since: 4.0) # +# @extended-l2: true if the image has extended L2 entries; only valid for +# compat >= 1.1 (since 5.2) +# # @lazy-refcounts: on or off; only valid for compat >= 1.1 # # @corrupt: true if the image has been marked corrupt; only valid for @@ -88,6 +91,7 @@ 'compat': 'str', '*data-file': 'str', '*data-file-raw': 'bool', + '*extended-l2': 'bool', '*lazy-refcounts': 'bool', '*corrupt': 'bool', 'refcount-bits': 'int', @@ -4304,6 +4308,8 @@ # @data-file-raw: True if the external data file must stay valid as a # standalone (read-only) raw image without looking at qcow2 # metadata (default: false; since: 4.0) +# @extended-l2 True to make the image have extended L2 entries +# (default: false; since 5.2) # @size: Size of the virtual disk in bytes # @version: Compatibility level (default: v3) # @backing-file: File name of the backing file if a backing file @@ -4324,6 +4330,7 @@ 'data': { 'file': 'BlockdevRef', '*data-file': 'BlockdevRef', '*data-file-raw': 'bool', + '*extended-l2': 'bool', 'size': 'size', '*version': 'BlockdevQcow2Version', '*backing-file': 'str', diff --git a/qemu-options.hx b/qemu-options.hx index 708583b4ce..30019c4eca 100644 --- a/qemu-options.hx +++ b/qemu-options.hx @@ -2294,7 +2294,7 @@ DEF("smbios", HAS_ARG, QEMU_OPTION_smbios, " [,sku=str]\n" " specify SMBIOS type 3 fields\n" "-smbios type=4[,sock_pfx=str][,manufacturer=str][,version=str][,serial=str]\n" - " [,asset=str][,part=str]\n" + " [,asset=str][,part=str][,max-speed=%d][,current-speed=%d]\n" " specify SMBIOS type 4 fields\n" "-smbios type=17[,loc_pfx=str][,bank=str][,manufacturer=str][,serial=str]\n" " [,asset=str][,part=str][,speed=%d]\n" diff --git a/roms/seabios-hppa b/roms/seabios-hppa -Subproject 1630ac7d65c4a09218cc677f1fa56cd5b314044 +Subproject 4ff7639e2b86d5775fa7d5cd0dbfa4d3a385a70 diff --git a/target/riscv/cpu.h b/target/riscv/cpu.h index a804a5d0ba..383808bf88 100644 --- a/target/riscv/cpu.h +++ b/target/riscv/cpu.h @@ -321,6 +321,8 @@ bool riscv_cpu_virt_enabled(CPURISCVState *env); void riscv_cpu_set_virt_enabled(CPURISCVState *env, bool enable); bool riscv_cpu_force_hs_excep_enabled(CPURISCVState *env); void riscv_cpu_set_force_hs_excep(CPURISCVState *env, bool enable); +bool riscv_cpu_two_stage_lookup(CPURISCVState *env); +void riscv_cpu_set_two_stage_lookup(CPURISCVState *env, bool enable); int riscv_cpu_mmu_index(CPURISCVState *env, bool ifetch); hwaddr riscv_cpu_get_phys_page_debug(CPUState *cpu, vaddr addr); void riscv_cpu_do_unaligned_access(CPUState *cs, vaddr addr, diff --git a/target/riscv/cpu_bits.h b/target/riscv/cpu_bits.h index 8117e8b5a7..bd36062877 100644 --- a/target/riscv/cpu_bits.h +++ b/target/riscv/cpu_bits.h @@ -197,9 +197,12 @@ #define CSR_HIDELEG 0x603 #define CSR_HIE 0x604 #define CSR_HCOUNTEREN 0x606 +#define CSR_HGEIE 0x607 #define CSR_HTVAL 0x643 +#define CSR_HVIP 0x645 #define CSR_HIP 0x644 #define CSR_HTINST 0x64A +#define CSR_HGEIP 0xE12 #define CSR_HGATP 0x680 #define CSR_HTIMEDELTA 0x605 #define CSR_HTIMEDELTAH 0x615 @@ -379,10 +382,10 @@ #define MSTATUS_TW 0x20000000 /* since: priv-1.10 */ #define MSTATUS_TSR 0x40000000 /* since: priv-1.10 */ #if defined(TARGET_RISCV64) -#define MSTATUS_MTL 0x4000000000ULL +#define MSTATUS_GVA 0x4000000000ULL #define MSTATUS_MPV 0x8000000000ULL #elif defined(TARGET_RISCV32) -#define MSTATUS_MTL 0x00000040 +#define MSTATUS_GVA 0x00000040 #define MSTATUS_MPV 0x00000080 #endif @@ -437,12 +440,17 @@ #endif /* hstatus CSR bits */ -#define HSTATUS_SPRV 0x00000001 +#define HSTATUS_VSBE 0x00000020 +#define HSTATUS_GVA 0x00000040 #define HSTATUS_SPV 0x00000080 -#define HSTATUS_SP2P 0x00000100 -#define HSTATUS_SP2V 0x00000200 +#define HSTATUS_SPVP 0x00000100 +#define HSTATUS_HU 0x00000200 +#define HSTATUS_VGEIN 0x0003F000 #define HSTATUS_VTVM 0x00100000 #define HSTATUS_VTSR 0x00400000 +#if defined(TARGET_RISCV64) +#define HSTATUS_VSXL 0x300000000 +#endif #define HSTATUS32_WPRI 0xFF8FF87E #define HSTATUS64_WPRI 0xFFFFFFFFFF8FF87EULL @@ -453,6 +461,11 @@ #define HSTATUS_WPRI HSTATUS64_WPRI #endif +#define HCOUNTEREN_CY (1 << 0) +#define HCOUNTEREN_TM (1 << 1) +#define HCOUNTEREN_IR (1 << 2) +#define HCOUNTEREN_HPM3 (1 << 3) + /* Privilege modes */ #define PRV_U 0 #define PRV_S 1 @@ -467,6 +480,7 @@ * page table fault. */ #define FORCE_HS_EXCEP 2 +#define HS_TWO_STAGE 4 /* RV32 satp CSR field masks */ #define SATP32_MODE 0x80000000 @@ -544,6 +558,7 @@ #define RISCV_EXCP_STORE_PAGE_FAULT 0xf /* since: priv-1.10.0 */ #define RISCV_EXCP_INST_GUEST_PAGE_FAULT 0x14 #define RISCV_EXCP_LOAD_GUEST_ACCESS_FAULT 0x15 +#define RISCV_EXCP_VIRT_INSTRUCTION_FAULT 0x16 #define RISCV_EXCP_STORE_GUEST_AMO_ACCESS_FAULT 0x17 #define RISCV_EXCP_INT_FLAG 0x80000000 diff --git a/target/riscv/cpu_helper.c b/target/riscv/cpu_helper.c index fd1d373b6f..dc7ae3e7b1 100644 --- a/target/riscv/cpu_helper.c +++ b/target/riscv/cpu_helper.c @@ -220,6 +220,24 @@ void riscv_cpu_set_force_hs_excep(CPURISCVState *env, bool enable) env->virt = set_field(env->virt, FORCE_HS_EXCEP, enable); } +bool riscv_cpu_two_stage_lookup(CPURISCVState *env) +{ + if (!riscv_has_ext(env, RVH)) { + return false; + } + + return get_field(env->virt, HS_TWO_STAGE); +} + +void riscv_cpu_set_two_stage_lookup(CPURISCVState *env, bool enable) +{ + if (!riscv_has_ext(env, RVH)) { + return; + } + + env->virt = set_field(env->virt, HS_TWO_STAGE, enable); +} + int riscv_cpu_claim_interrupts(RISCVCPU *cpu, uint32_t interrupts) { CPURISCVState *env = &cpu->env; @@ -322,22 +340,13 @@ static int get_physical_address(CPURISCVState *env, hwaddr *physical, * was called. Background registers will be used if the guest has * forced a two stage translation to be on (in HS or M mode). */ + if (riscv_cpu_two_stage_lookup(env) && access_type != MMU_INST_FETCH) { + use_background = true; + } + if (mode == PRV_M && access_type != MMU_INST_FETCH) { if (get_field(env->mstatus, MSTATUS_MPRV)) { mode = get_field(env->mstatus, MSTATUS_MPP); - - if (riscv_has_ext(env, RVH) && - MSTATUS_MPV_ISSET(env)) { - use_background = true; - } - } - } - - if (mode == PRV_S && access_type != MMU_INST_FETCH && - riscv_has_ext(env, RVH) && !riscv_cpu_virt_enabled(env)) { - if (get_field(env->hstatus, HSTATUS_SPRV)) { - mode = get_field(env->mstatus, SSTATUS_SPP); - use_background = true; } } @@ -590,7 +599,8 @@ static void raise_mmu_exception(CPURISCVState *env, target_ulong address, } break; case MMU_DATA_LOAD: - if (riscv_cpu_virt_enabled(env) && !first_stage) { + if ((riscv_cpu_virt_enabled(env) || riscv_cpu_two_stage_lookup(env)) && + !first_stage) { cs->exception_index = RISCV_EXCP_LOAD_GUEST_ACCESS_FAULT; } else { cs->exception_index = page_fault_exceptions ? @@ -598,7 +608,8 @@ static void raise_mmu_exception(CPURISCVState *env, target_ulong address, } break; case MMU_DATA_STORE: - if (riscv_cpu_virt_enabled(env) && !first_stage) { + if ((riscv_cpu_virt_enabled(env) || riscv_cpu_two_stage_lookup(env)) && + !first_stage) { cs->exception_index = RISCV_EXCP_STORE_GUEST_AMO_ACCESS_FAULT; } else { cs->exception_index = page_fault_exceptions ? @@ -688,8 +699,6 @@ bool riscv_cpu_tlb_fill(CPUState *cs, vaddr address, int size, hwaddr pa = 0; int prot, prot2; bool pmp_violation = false; - bool m_mode_two_stage = false; - bool hs_mode_two_stage = false; bool first_stage_error = true; int ret = TRANSLATE_FAIL; int mode = mmu_idx; @@ -700,30 +709,21 @@ bool riscv_cpu_tlb_fill(CPUState *cs, vaddr address, int size, qemu_log_mask(CPU_LOG_MMU, "%s ad %" VADDR_PRIx " rw %d mmu_idx %d\n", __func__, address, access_type, mmu_idx); - /* - * Determine if we are in M mode and MPRV is set or in HS mode and SPRV is - * set and we want to access a virtulisation address. - */ - if (riscv_has_ext(env, RVH)) { - m_mode_two_stage = env->priv == PRV_M && - access_type != MMU_INST_FETCH && - get_field(env->mstatus, MSTATUS_MPRV) && - MSTATUS_MPV_ISSET(env); - - hs_mode_two_stage = env->priv == PRV_S && - !riscv_cpu_virt_enabled(env) && - access_type != MMU_INST_FETCH && - get_field(env->hstatus, HSTATUS_SPRV) && - get_field(env->hstatus, HSTATUS_SPV); - } - if (mode == PRV_M && access_type != MMU_INST_FETCH) { if (get_field(env->mstatus, MSTATUS_MPRV)) { mode = get_field(env->mstatus, MSTATUS_MPP); } } - if (riscv_cpu_virt_enabled(env) || m_mode_two_stage || hs_mode_two_stage) { + if (riscv_has_ext(env, RVH) && env->priv == PRV_M && + access_type != MMU_INST_FETCH && + get_field(env->mstatus, MSTATUS_MPRV) && + MSTATUS_MPV_ISSET(env)) { + riscv_cpu_set_two_stage_lookup(env, true); + } + + if (riscv_cpu_virt_enabled(env) || + (riscv_cpu_two_stage_lookup(env) && access_type != MMU_INST_FETCH)) { /* Two stage lookup */ ret = get_physical_address(env, &pa, &prot, address, access_type, mmu_idx, true, true); @@ -775,6 +775,14 @@ bool riscv_cpu_tlb_fill(CPUState *cs, vaddr address, int size, __func__, address, ret, pa, prot); } + /* We did the two stage lookup based on MPRV, unset the lookup */ + if (riscv_has_ext(env, RVH) && env->priv == PRV_M && + access_type != MMU_INST_FETCH && + get_field(env->mstatus, MSTATUS_MPRV) && + MSTATUS_MPV_ISSET(env)) { + riscv_cpu_set_two_stage_lookup(env, false); + } + if (riscv_feature(env, RISCV_FEATURE_PMP) && (ret == TRANSLATE_SUCCESS) && !pmp_hart_has_privs(env, pa, size, 1 << access_type, mode)) { @@ -893,22 +901,35 @@ void riscv_cpu_do_interrupt(CPUState *cs) if (riscv_has_ext(env, RVH)) { target_ulong hdeleg = async ? env->hideleg : env->hedeleg; + if ((riscv_cpu_virt_enabled(env) || + riscv_cpu_two_stage_lookup(env)) && tval) { + /* + * If we are writing a guest virtual address to stval, set + * this to 1. If we are trapping to VS we will set this to 0 + * later. + */ + env->hstatus = set_field(env->hstatus, HSTATUS_GVA, 1); + } else { + /* For other HS-mode traps, we set this to 0. */ + env->hstatus = set_field(env->hstatus, HSTATUS_GVA, 0); + } + if (riscv_cpu_virt_enabled(env) && ((hdeleg >> cause) & 1) && !force_hs_execp) { + /* Trap to VS mode */ /* * See if we need to adjust cause. Yes if its VS mode interrupt * no if hypervisor has delegated one of hs mode's interrupt */ if (cause == IRQ_VS_TIMER || cause == IRQ_VS_SOFT || - cause == IRQ_VS_EXT) + cause == IRQ_VS_EXT) { cause = cause - 1; - /* Trap to VS mode */ + } + env->hstatus = set_field(env->hstatus, HSTATUS_GVA, 0); } else if (riscv_cpu_virt_enabled(env)) { /* Trap into HS mode, from virt */ riscv_cpu_swap_hypervisor_regs(env); - env->hstatus = set_field(env->hstatus, HSTATUS_SP2V, - get_field(env->hstatus, HSTATUS_SPV)); - env->hstatus = set_field(env->hstatus, HSTATUS_SP2P, + env->hstatus = set_field(env->hstatus, HSTATUS_SPVP, get_field(env->mstatus, SSTATUS_SPP)); env->hstatus = set_field(env->hstatus, HSTATUS_SPV, riscv_cpu_virt_enabled(env)); @@ -919,13 +940,11 @@ void riscv_cpu_do_interrupt(CPUState *cs) riscv_cpu_set_force_hs_excep(env, 0); } else { /* Trap into HS mode */ - env->hstatus = set_field(env->hstatus, HSTATUS_SP2V, - get_field(env->hstatus, HSTATUS_SPV)); - env->hstatus = set_field(env->hstatus, HSTATUS_SP2P, - get_field(env->mstatus, SSTATUS_SPP)); - env->hstatus = set_field(env->hstatus, HSTATUS_SPV, - riscv_cpu_virt_enabled(env)); - + if (!riscv_cpu_two_stage_lookup(env)) { + env->hstatus = set_field(env->hstatus, HSTATUS_SPV, + riscv_cpu_virt_enabled(env)); + } + riscv_cpu_set_two_stage_lookup(env, false); htval = env->guest_phys_fault_addr; } } @@ -951,13 +970,15 @@ void riscv_cpu_do_interrupt(CPUState *cs) #ifdef TARGET_RISCV32 env->mstatush = set_field(env->mstatush, MSTATUS_MPV, riscv_cpu_virt_enabled(env)); - env->mstatush = set_field(env->mstatush, MSTATUS_MTL, - riscv_cpu_force_hs_excep_enabled(env)); + if (riscv_cpu_virt_enabled(env) && tval) { + env->mstatush = set_field(env->mstatush, MSTATUS_GVA, 1); + } #else env->mstatus = set_field(env->mstatus, MSTATUS_MPV, riscv_cpu_virt_enabled(env)); - env->mstatus = set_field(env->mstatus, MSTATUS_MTL, - riscv_cpu_force_hs_excep_enabled(env)); + if (riscv_cpu_virt_enabled(env) && tval) { + env->mstatus = set_field(env->mstatus, MSTATUS_GVA, 1); + } #endif mtval2 = env->guest_phys_fault_addr; diff --git a/target/riscv/csr.c b/target/riscv/csr.c index 6a96a01b1c..200001de74 100644 --- a/target/riscv/csr.c +++ b/target/riscv/csr.c @@ -51,7 +51,7 @@ static int fs(CPURISCVState *env, int csrno) return 0; } if (!env->debugger && !riscv_cpu_fp_enabled(env)) { - return -1; + return -RISCV_EXCP_ILLEGAL_INST; } #endif return 0; @@ -73,7 +73,62 @@ static int ctr(CPURISCVState *env, int csrno) if (!cpu->cfg.ext_counters) { /* The Counters extensions is not enabled */ - return -1; + return -RISCV_EXCP_ILLEGAL_INST; + } + + if (riscv_cpu_virt_enabled(env)) { + switch (csrno) { + case CSR_CYCLE: + if (!get_field(env->hcounteren, HCOUNTEREN_CY) && + get_field(env->mcounteren, HCOUNTEREN_CY)) { + return -RISCV_EXCP_VIRT_INSTRUCTION_FAULT; + } + break; + case CSR_TIME: + if (!get_field(env->hcounteren, HCOUNTEREN_TM) && + get_field(env->mcounteren, HCOUNTEREN_TM)) { + return -RISCV_EXCP_VIRT_INSTRUCTION_FAULT; + } + break; + case CSR_INSTRET: + if (!get_field(env->hcounteren, HCOUNTEREN_IR) && + get_field(env->mcounteren, HCOUNTEREN_IR)) { + return -RISCV_EXCP_VIRT_INSTRUCTION_FAULT; + } + break; + case CSR_HPMCOUNTER3...CSR_HPMCOUNTER31: + if (!get_field(env->hcounteren, 1 << (csrno - CSR_HPMCOUNTER3)) && + get_field(env->mcounteren, 1 << (csrno - CSR_HPMCOUNTER3))) { + return -RISCV_EXCP_VIRT_INSTRUCTION_FAULT; + } + break; +#if defined(TARGET_RISCV32) + case CSR_CYCLEH: + if (!get_field(env->hcounteren, HCOUNTEREN_CY) && + get_field(env->mcounteren, HCOUNTEREN_CY)) { + return -RISCV_EXCP_VIRT_INSTRUCTION_FAULT; + } + break; + case CSR_TIMEH: + if (!get_field(env->hcounteren, HCOUNTEREN_TM) && + get_field(env->mcounteren, HCOUNTEREN_TM)) { + return -RISCV_EXCP_VIRT_INSTRUCTION_FAULT; + } + break; + case CSR_INSTRETH: + if (!get_field(env->hcounteren, HCOUNTEREN_IR) && + get_field(env->mcounteren, HCOUNTEREN_IR)) { + return -RISCV_EXCP_VIRT_INSTRUCTION_FAULT; + } + break; + case CSR_HPMCOUNTER3H...CSR_HPMCOUNTER31H: + if (!get_field(env->hcounteren, 1 << (csrno - CSR_HPMCOUNTER3H)) && + get_field(env->mcounteren, 1 << (csrno - CSR_HPMCOUNTER3H))) { + return -RISCV_EXCP_VIRT_INSTRUCTION_FAULT; + } + break; +#endif + } } #endif return 0; @@ -98,10 +153,12 @@ static int hmode(CPURISCVState *env, int csrno) if ((env->priv == PRV_S && !riscv_cpu_virt_enabled(env)) || env->priv == PRV_M) { return 0; + } else { + return -RISCV_EXCP_VIRT_INSTRUCTION_FAULT; } } - return -1; + return -RISCV_EXCP_ILLEGAL_INST; } static int pmp(CPURISCVState *env, int csrno) @@ -115,7 +172,7 @@ static int read_fflags(CPURISCVState *env, int csrno, target_ulong *val) { #if !defined(CONFIG_USER_ONLY) if (!env->debugger && !riscv_cpu_fp_enabled(env)) { - return -1; + return -RISCV_EXCP_ILLEGAL_INST; } #endif *val = riscv_cpu_get_fflags(env); @@ -126,7 +183,7 @@ static int write_fflags(CPURISCVState *env, int csrno, target_ulong val) { #if !defined(CONFIG_USER_ONLY) if (!env->debugger && !riscv_cpu_fp_enabled(env)) { - return -1; + return -RISCV_EXCP_ILLEGAL_INST; } env->mstatus |= MSTATUS_FS; #endif @@ -138,7 +195,7 @@ static int read_frm(CPURISCVState *env, int csrno, target_ulong *val) { #if !defined(CONFIG_USER_ONLY) if (!env->debugger && !riscv_cpu_fp_enabled(env)) { - return -1; + return -RISCV_EXCP_ILLEGAL_INST; } #endif *val = env->frm; @@ -149,7 +206,7 @@ static int write_frm(CPURISCVState *env, int csrno, target_ulong val) { #if !defined(CONFIG_USER_ONLY) if (!env->debugger && !riscv_cpu_fp_enabled(env)) { - return -1; + return -RISCV_EXCP_ILLEGAL_INST; } env->mstatus |= MSTATUS_FS; #endif @@ -161,7 +218,7 @@ static int read_fcsr(CPURISCVState *env, int csrno, target_ulong *val) { #if !defined(CONFIG_USER_ONLY) if (!env->debugger && !riscv_cpu_fp_enabled(env)) { - return -1; + return -RISCV_EXCP_ILLEGAL_INST; } #endif *val = (riscv_cpu_get_fflags(env) << FSR_AEXC_SHIFT) @@ -177,7 +234,7 @@ static int write_fcsr(CPURISCVState *env, int csrno, target_ulong val) { #if !defined(CONFIG_USER_ONLY) if (!env->debugger && !riscv_cpu_fp_enabled(env)) { - return -1; + return -RISCV_EXCP_ILLEGAL_INST; } env->mstatus |= MSTATUS_FS; #endif @@ -291,7 +348,7 @@ static int read_time(CPURISCVState *env, int csrno, target_ulong *val) uint64_t delta = riscv_cpu_virt_enabled(env) ? env->htimedelta : 0; if (!env->rdtime_fn) { - return -1; + return -RISCV_EXCP_ILLEGAL_INST; } *val = env->rdtime_fn() + delta; @@ -304,7 +361,7 @@ static int read_timeh(CPURISCVState *env, int csrno, target_ulong *val) uint64_t delta = riscv_cpu_virt_enabled(env) ? env->htimedelta : 0; if (!env->rdtime_fn) { - return -1; + return -RISCV_EXCP_ILLEGAL_INST; } *val = (env->rdtime_fn() + delta) >> 32; @@ -340,6 +397,7 @@ static const target_ulong delegable_excps = (1ULL << (RISCV_EXCP_STORE_PAGE_FAULT)) | (1ULL << (RISCV_EXCP_INST_GUEST_PAGE_FAULT)) | (1ULL << (RISCV_EXCP_LOAD_GUEST_ACCESS_FAULT)) | + (1ULL << (RISCV_EXCP_VIRT_INSTRUCTION_FAULT)) | (1ULL << (RISCV_EXCP_STORE_GUEST_AMO_ACCESS_FAULT)); static const target_ulong sstatus_v1_10_mask = SSTATUS_SIE | SSTATUS_SPIE | SSTATUS_UIE | SSTATUS_UPIE | SSTATUS_SPP | SSTATUS_FS | SSTATUS_XS | @@ -403,10 +461,10 @@ static int write_mstatus(CPURISCVState *env, int csrno, target_ulong val) MSTATUS_TW; #if defined(TARGET_RISCV64) /* - * RV32: MPV and MTL are not in mstatus. The current plan is to + * RV32: MPV and GVA are not in mstatus. The current plan is to * add them to mstatush. For now, we just don't support it. */ - mask |= MSTATUS_MTL | MSTATUS_MPV; + mask |= MSTATUS_MPV | MSTATUS_GVA; #endif mstatus = (mstatus & ~mask) | (val & mask); @@ -432,7 +490,7 @@ static int write_mstatush(CPURISCVState *env, int csrno, target_ulong val) tlb_flush(env_cpu(env)); } - val &= MSTATUS_MPV | MSTATUS_MTL; + val &= MSTATUS_MPV | MSTATUS_GVA; env->mstatush = val; @@ -570,7 +628,7 @@ static int write_mcounteren(CPURISCVState *env, int csrno, target_ulong val) static int read_mscounteren(CPURISCVState *env, int csrno, target_ulong *val) { if (env->priv_ver < PRIV_VERSION_1_11_0) { - return -1; + return -RISCV_EXCP_ILLEGAL_INST; } *val = env->mcounteren; return 0; @@ -580,7 +638,7 @@ static int read_mscounteren(CPURISCVState *env, int csrno, target_ulong *val) static int write_mscounteren(CPURISCVState *env, int csrno, target_ulong val) { if (env->priv_ver < PRIV_VERSION_1_11_0) { - return -1; + return -RISCV_EXCP_ILLEGAL_INST; } env->mcounteren = val; return 0; @@ -804,7 +862,7 @@ static int read_satp(CPURISCVState *env, int csrno, target_ulong *val) } if (env->priv == PRV_S && get_field(env->mstatus, MSTATUS_TVM)) { - return -1; + return -RISCV_EXCP_ILLEGAL_INST; } else { *val = env->satp; } @@ -821,7 +879,7 @@ static int write_satp(CPURISCVState *env, int csrno, target_ulong val) ((val ^ env->satp) & (SATP_MODE | SATP_ASID | SATP_PPN))) { if (env->priv == PRV_S && get_field(env->mstatus, MSTATUS_TVM)) { - return -1; + return -RISCV_EXCP_ILLEGAL_INST; } else { if((val ^ env->satp) & SATP_ASID) { tlb_flush(env_cpu(env)); @@ -836,12 +894,26 @@ static int write_satp(CPURISCVState *env, int csrno, target_ulong val) static int read_hstatus(CPURISCVState *env, int csrno, target_ulong *val) { *val = env->hstatus; +#ifdef TARGET_RISCV64 + /* We only support 64-bit VSXL */ + *val = set_field(*val, HSTATUS_VSXL, 2); +#endif + /* We only support little endian */ + *val = set_field(*val, HSTATUS_VSBE, 0); return 0; } static int write_hstatus(CPURISCVState *env, int csrno, target_ulong val) { env->hstatus = val; +#ifdef TARGET_RISCV64 + if (get_field(val, HSTATUS_VSXL) != 2) { + qemu_log_mask(LOG_UNIMP, "QEMU does not support mixed HSXLEN options."); + } +#endif + if (get_field(val, HSTATUS_VSBE) != 0) { + qemu_log_mask(LOG_UNIMP, "QEMU does not support big endian guests."); + } return 0; } @@ -869,12 +941,25 @@ static int write_hideleg(CPURISCVState *env, int csrno, target_ulong val) return 0; } +static int rmw_hvip(CPURISCVState *env, int csrno, target_ulong *ret_value, + target_ulong new_value, target_ulong write_mask) +{ + int ret = rmw_mip(env, 0, ret_value, new_value, + write_mask & hip_writable_mask); + + *ret_value &= hip_writable_mask; + + return ret; +} + static int rmw_hip(CPURISCVState *env, int csrno, target_ulong *ret_value, target_ulong new_value, target_ulong write_mask) { int ret = rmw_mip(env, 0, ret_value, new_value, write_mask & hip_writable_mask); + *ret_value &= hip_writable_mask; + return ret; } @@ -902,6 +987,18 @@ static int write_hcounteren(CPURISCVState *env, int csrno, target_ulong val) return 0; } +static int read_hgeie(CPURISCVState *env, int csrno, target_ulong *val) +{ + qemu_log_mask(LOG_UNIMP, "No support for a non-zero GEILEN."); + return 0; +} + +static int write_hgeie(CPURISCVState *env, int csrno, target_ulong val) +{ + qemu_log_mask(LOG_UNIMP, "No support for a non-zero GEILEN."); + return 0; +} + static int read_htval(CPURISCVState *env, int csrno, target_ulong *val) { *val = env->htval; @@ -922,7 +1019,18 @@ static int read_htinst(CPURISCVState *env, int csrno, target_ulong *val) static int write_htinst(CPURISCVState *env, int csrno, target_ulong val) { - env->htinst = val; + return 0; +} + +static int read_hgeip(CPURISCVState *env, int csrno, target_ulong *val) +{ + qemu_log_mask(LOG_UNIMP, "No support for a non-zero GEILEN."); + return 0; +} + +static int write_hgeip(CPURISCVState *env, int csrno, target_ulong val) +{ + qemu_log_mask(LOG_UNIMP, "No support for a non-zero GEILEN."); return 0; } @@ -941,7 +1049,7 @@ static int write_hgatp(CPURISCVState *env, int csrno, target_ulong val) static int read_htimedelta(CPURISCVState *env, int csrno, target_ulong *val) { if (!env->rdtime_fn) { - return -1; + return -RISCV_EXCP_ILLEGAL_INST; } #if defined(TARGET_RISCV32) @@ -955,7 +1063,7 @@ static int read_htimedelta(CPURISCVState *env, int csrno, target_ulong *val) static int write_htimedelta(CPURISCVState *env, int csrno, target_ulong val) { if (!env->rdtime_fn) { - return -1; + return -RISCV_EXCP_ILLEGAL_INST; } #if defined(TARGET_RISCV32) @@ -970,7 +1078,7 @@ static int write_htimedelta(CPURISCVState *env, int csrno, target_ulong val) static int read_htimedeltah(CPURISCVState *env, int csrno, target_ulong *val) { if (!env->rdtime_fn) { - return -1; + return -RISCV_EXCP_ILLEGAL_INST; } *val = env->htimedelta >> 32; @@ -980,7 +1088,7 @@ static int read_htimedeltah(CPURISCVState *env, int csrno, target_ulong *val) static int write_htimedeltah(CPURISCVState *env, int csrno, target_ulong val) { if (!env->rdtime_fn) { - return -1; + return -RISCV_EXCP_ILLEGAL_INST; } env->htimedelta = deposit64(env->htimedelta, 32, 32, (uint64_t)val); @@ -1178,18 +1286,22 @@ int riscv_csrrw(CPURISCVState *env, int csrno, target_ulong *ret_value, if ((write_mask && read_only) || (!env->debugger && (effective_priv < get_field(csrno, 0x300)))) { - return -1; + return -RISCV_EXCP_ILLEGAL_INST; } #endif /* ensure the CSR extension is enabled. */ if (!cpu->cfg.ext_icsr) { - return -1; + return -RISCV_EXCP_ILLEGAL_INST; } /* check predicate */ - if (!csr_ops[csrno].predicate || csr_ops[csrno].predicate(env, csrno) < 0) { - return -1; + if (!csr_ops[csrno].predicate) { + return -RISCV_EXCP_ILLEGAL_INST; + } + ret = csr_ops[csrno].predicate(env, csrno); + if (ret < 0) { + return ret; } /* execute combined read/write operation if it exists */ @@ -1199,7 +1311,7 @@ int riscv_csrrw(CPURISCVState *env, int csrno, target_ulong *ret_value, /* if no accessor exists then return failure */ if (!csr_ops[csrno].read) { - return -1; + return -RISCV_EXCP_ILLEGAL_INST; } /* read old value */ @@ -1328,11 +1440,14 @@ static riscv_csr_operations csr_ops[CSR_TABLE_SIZE] = { [CSR_HSTATUS] = { hmode, read_hstatus, write_hstatus }, [CSR_HEDELEG] = { hmode, read_hedeleg, write_hedeleg }, [CSR_HIDELEG] = { hmode, read_hideleg, write_hideleg }, + [CSR_HVIP] = { hmode, NULL, NULL, rmw_hvip }, [CSR_HIP] = { hmode, NULL, NULL, rmw_hip }, [CSR_HIE] = { hmode, read_hie, write_hie }, [CSR_HCOUNTEREN] = { hmode, read_hcounteren, write_hcounteren }, + [CSR_HGEIE] = { hmode, read_hgeie, write_hgeie }, [CSR_HTVAL] = { hmode, read_htval, write_htval }, [CSR_HTINST] = { hmode, read_htinst, write_htinst }, + [CSR_HGEIP] = { hmode, read_hgeip, write_hgeip }, [CSR_HGATP] = { hmode, read_hgatp, write_hgatp }, [CSR_HTIMEDELTA] = { hmode, read_htimedelta, write_htimedelta }, #if defined(TARGET_RISCV32) diff --git a/target/riscv/helper.h b/target/riscv/helper.h index acc298219d..4b690147fb 100644 --- a/target/riscv/helper.h +++ b/target/riscv/helper.h @@ -80,6 +80,10 @@ DEF_HELPER_1(tlb_flush, void, env) /* Hypervisor functions */ #ifndef CONFIG_USER_ONLY DEF_HELPER_1(hyp_tlb_flush, void, env) +DEF_HELPER_1(hyp_gvma_tlb_flush, void, env) +DEF_HELPER_4(hyp_load, tl, env, tl, tl, tl) +DEF_HELPER_5(hyp_store, void, env, tl, tl, tl, tl) +DEF_HELPER_4(hyp_x_load, tl, env, tl, tl, tl) #endif /* Vector functions */ diff --git a/target/riscv/insn32-64.decode b/target/riscv/insn32-64.decode index 86153d93fa..8157dee8b7 100644 --- a/target/riscv/insn32-64.decode +++ b/target/riscv/insn32-64.decode @@ -81,3 +81,8 @@ fmv_x_d 1110001 00000 ..... 000 ..... 1010011 @r2 fcvt_d_l 1101001 00010 ..... ... ..... 1010011 @r2_rm fcvt_d_lu 1101001 00011 ..... ... ..... 1010011 @r2_rm fmv_d_x 1111001 00000 ..... 000 ..... 1010011 @r2 + +# *** RV32H Base Instruction Set *** +hlv_wu 0110100 00001 ..... 100 ..... 1110011 @r2 +hlv_d 0110110 00000 ..... 100 ..... 1110011 @r2 +hsv_d 0110111 ..... ..... 100 00000 1110011 @r2_s diff --git a/target/riscv/insn32.decode b/target/riscv/insn32.decode index bdd8563067..84080dd18c 100644 --- a/target/riscv/insn32.decode +++ b/target/riscv/insn32.decode @@ -78,6 +78,7 @@ @r_vm_0 ...... . ..... ..... ... ..... ....... &rmrr vm=0 %rs2 %rs1 %rd @r_wdvm ..... wd:1 vm:1 ..... ..... ... ..... ....... &rwdvm %rs2 %rs1 %rd @r2_zimm . zimm:11 ..... ... ..... ....... %rs1 %rd +@r2_s ....... ..... ..... ... ..... ....... %rs2 %rs1 @hfence_gvma ....... ..... ..... ... ..... ....... %rs2 %rs1 @hfence_vvma ....... ..... ..... ... ..... ....... %rs2 %rs1 @@ -223,6 +224,16 @@ fcvt_d_w 1101001 00000 ..... ... ..... 1010011 @r2_rm fcvt_d_wu 1101001 00001 ..... ... ..... 1010011 @r2_rm # *** RV32H Base Instruction Set *** +hlv_b 0110000 00000 ..... 100 ..... 1110011 @r2 +hlv_bu 0110000 00001 ..... 100 ..... 1110011 @r2 +hlv_h 0110010 00000 ..... 100 ..... 1110011 @r2 +hlv_hu 0110010 00001 ..... 100 ..... 1110011 @r2 +hlvx_hu 0110010 00011 ..... 100 ..... 1110011 @r2 +hlv_w 0110100 00000 ..... 100 ..... 1110011 @r2 +hlvx_wu 0110100 00011 ..... 100 ..... 1110011 @r2 +hsv_b 0110001 ..... ..... 100 00000 1110011 @r2_s +hsv_h 0110011 ..... ..... 100 00000 1110011 @r2_s +hsv_w 0110101 ..... ..... 100 00000 1110011 @r2_s hfence_gvma 0110001 ..... ..... 000 00000 1110011 @hfence_gvma hfence_vvma 0010001 ..... ..... 000 00000 1110011 @hfence_vvma diff --git a/target/riscv/insn_trans/trans_rvh.c.inc b/target/riscv/insn_trans/trans_rvh.c.inc index 263b652d90..881c9ef4d2 100644 --- a/target/riscv/insn_trans/trans_rvh.c.inc +++ b/target/riscv/insn_trans/trans_rvh.c.inc @@ -16,11 +16,351 @@ * this program. If not, see <http://www.gnu.org/licenses/>. */ +static bool trans_hlv_b(DisasContext *ctx, arg_hlv_b *a) +{ + REQUIRE_EXT(ctx, RVH); +#ifndef CONFIG_USER_ONLY + TCGv t0 = tcg_temp_new(); + TCGv t1 = tcg_temp_new(); + TCGv mem_idx = tcg_temp_new(); + TCGv memop = tcg_temp_new(); + + gen_get_gpr(t0, a->rs1); + tcg_gen_movi_tl(mem_idx, ctx->mem_idx); + tcg_gen_movi_tl(memop, MO_SB); + + gen_helper_hyp_load(t1, cpu_env, t0, mem_idx, memop); + gen_set_gpr(a->rd, t1); + + tcg_temp_free(t0); + tcg_temp_free(t1); + tcg_temp_free(mem_idx); + tcg_temp_free(memop); + return true; +#else + return false; +#endif +} + +static bool trans_hlv_h(DisasContext *ctx, arg_hlv_h *a) +{ + REQUIRE_EXT(ctx, RVH); +#ifndef CONFIG_USER_ONLY + TCGv t0 = tcg_temp_new(); + TCGv t1 = tcg_temp_new(); + TCGv mem_idx = tcg_temp_new(); + TCGv memop = tcg_temp_new(); + + gen_get_gpr(t0, a->rs1); + tcg_gen_movi_tl(mem_idx, ctx->mem_idx); + tcg_gen_movi_tl(memop, MO_TESW); + + gen_helper_hyp_load(t1, cpu_env, t0, mem_idx, memop); + gen_set_gpr(a->rd, t1); + + tcg_temp_free(t0); + tcg_temp_free(t1); + tcg_temp_free(mem_idx); + tcg_temp_free(memop); + return true; +#else + return false; +#endif +} + +static bool trans_hlv_w(DisasContext *ctx, arg_hlv_w *a) +{ + REQUIRE_EXT(ctx, RVH); +#ifndef CONFIG_USER_ONLY + TCGv t0 = tcg_temp_new(); + TCGv t1 = tcg_temp_new(); + TCGv mem_idx = tcg_temp_new(); + TCGv memop = tcg_temp_new(); + + gen_get_gpr(t0, a->rs1); + tcg_gen_movi_tl(mem_idx, ctx->mem_idx); + tcg_gen_movi_tl(memop, MO_TESL); + + gen_helper_hyp_load(t1, cpu_env, t0, mem_idx, memop); + gen_set_gpr(a->rd, t1); + + tcg_temp_free(t0); + tcg_temp_free(t1); + tcg_temp_free(mem_idx); + tcg_temp_free(memop); + return true; +#else + return false; +#endif +} + +static bool trans_hlv_bu(DisasContext *ctx, arg_hlv_bu *a) +{ + REQUIRE_EXT(ctx, RVH); +#ifndef CONFIG_USER_ONLY + TCGv t0 = tcg_temp_new(); + TCGv t1 = tcg_temp_new(); + TCGv mem_idx = tcg_temp_new(); + TCGv memop = tcg_temp_new(); + + gen_get_gpr(t0, a->rs1); + tcg_gen_movi_tl(mem_idx, ctx->mem_idx); + tcg_gen_movi_tl(memop, MO_UB); + + gen_helper_hyp_load(t1, cpu_env, t0, mem_idx, memop); + gen_set_gpr(a->rd, t1); + + tcg_temp_free(t0); + tcg_temp_free(t1); + tcg_temp_free(mem_idx); + tcg_temp_free(memop); + return true; +#else + return false; +#endif +} + +static bool trans_hlv_hu(DisasContext *ctx, arg_hlv_hu *a) +{ + REQUIRE_EXT(ctx, RVH); +#ifndef CONFIG_USER_ONLY + TCGv t0 = tcg_temp_new(); + TCGv t1 = tcg_temp_new(); + TCGv mem_idx = tcg_temp_new(); + TCGv memop = tcg_temp_new(); + + gen_get_gpr(t0, a->rs1); + tcg_gen_movi_tl(mem_idx, ctx->mem_idx); + tcg_gen_movi_tl(memop, MO_TEUW); + + gen_helper_hyp_load(t1, cpu_env, t0, mem_idx, memop); + gen_set_gpr(a->rd, t1); + + tcg_temp_free(t0); + tcg_temp_free(t1); + tcg_temp_free(mem_idx); + tcg_temp_free(memop); + return true; +#else + return false; +#endif +} + +static bool trans_hsv_b(DisasContext *ctx, arg_hsv_b *a) +{ + REQUIRE_EXT(ctx, RVH); +#ifndef CONFIG_USER_ONLY + TCGv t0 = tcg_temp_new(); + TCGv dat = tcg_temp_new(); + TCGv mem_idx = tcg_temp_new(); + TCGv memop = tcg_temp_new(); + + gen_get_gpr(t0, a->rs1); + gen_get_gpr(dat, a->rs2); + tcg_gen_movi_tl(mem_idx, ctx->mem_idx); + tcg_gen_movi_tl(memop, MO_SB); + + gen_helper_hyp_store(cpu_env, t0, dat, mem_idx, memop); + + tcg_temp_free(t0); + tcg_temp_free(dat); + tcg_temp_free(mem_idx); + tcg_temp_free(memop); + return true; +#else + return false; +#endif +} + +static bool trans_hsv_h(DisasContext *ctx, arg_hsv_h *a) +{ + REQUIRE_EXT(ctx, RVH); +#ifndef CONFIG_USER_ONLY + TCGv t0 = tcg_temp_new(); + TCGv dat = tcg_temp_new(); + TCGv mem_idx = tcg_temp_new(); + TCGv memop = tcg_temp_new(); + + gen_get_gpr(t0, a->rs1); + gen_get_gpr(dat, a->rs2); + tcg_gen_movi_tl(mem_idx, ctx->mem_idx); + tcg_gen_movi_tl(memop, MO_TESW); + + gen_helper_hyp_store(cpu_env, t0, dat, mem_idx, memop); + + tcg_temp_free(t0); + tcg_temp_free(dat); + tcg_temp_free(mem_idx); + tcg_temp_free(memop); + return true; +#else + return false; +#endif +} + +static bool trans_hsv_w(DisasContext *ctx, arg_hsv_w *a) +{ + REQUIRE_EXT(ctx, RVH); +#ifndef CONFIG_USER_ONLY + TCGv t0 = tcg_temp_new(); + TCGv dat = tcg_temp_new(); + TCGv mem_idx = tcg_temp_new(); + TCGv memop = tcg_temp_new(); + + gen_get_gpr(t0, a->rs1); + gen_get_gpr(dat, a->rs2); + tcg_gen_movi_tl(mem_idx, ctx->mem_idx); + tcg_gen_movi_tl(memop, MO_TESL); + + gen_helper_hyp_store(cpu_env, t0, dat, mem_idx, memop); + + tcg_temp_free(t0); + tcg_temp_free(dat); + tcg_temp_free(mem_idx); + tcg_temp_free(memop); + return true; +#else + return false; +#endif +} + +#ifdef TARGET_RISCV64 +static bool trans_hlv_wu(DisasContext *ctx, arg_hlv_wu *a) +{ + REQUIRE_EXT(ctx, RVH); +#ifndef CONFIG_USER_ONLY + TCGv t0 = tcg_temp_new(); + TCGv t1 = tcg_temp_new(); + TCGv mem_idx = tcg_temp_new(); + TCGv memop = tcg_temp_new(); + + gen_get_gpr(t0, a->rs1); + tcg_gen_movi_tl(mem_idx, ctx->mem_idx); + tcg_gen_movi_tl(memop, MO_TEUL); + + gen_helper_hyp_load(t1, cpu_env, t0, mem_idx, memop); + gen_set_gpr(a->rd, t1); + + tcg_temp_free(t0); + tcg_temp_free(t1); + tcg_temp_free(mem_idx); + tcg_temp_free(memop); + return true; +#else + return false; +#endif +} + +static bool trans_hlv_d(DisasContext *ctx, arg_hlv_d *a) +{ + REQUIRE_EXT(ctx, RVH); +#ifndef CONFIG_USER_ONLY + TCGv t0 = tcg_temp_new(); + TCGv t1 = tcg_temp_new(); + TCGv mem_idx = tcg_temp_new(); + TCGv memop = tcg_temp_new(); + + gen_get_gpr(t0, a->rs1); + tcg_gen_movi_tl(mem_idx, ctx->mem_idx); + tcg_gen_movi_tl(memop, MO_TEQ); + + gen_helper_hyp_load(t1, cpu_env, t0, mem_idx, memop); + gen_set_gpr(a->rd, t1); + + tcg_temp_free(t0); + tcg_temp_free(t1); + tcg_temp_free(mem_idx); + tcg_temp_free(memop); + return true; +#else + return false; +#endif +} + +static bool trans_hsv_d(DisasContext *ctx, arg_hsv_d *a) +{ + REQUIRE_EXT(ctx, RVH); +#ifndef CONFIG_USER_ONLY + TCGv t0 = tcg_temp_new(); + TCGv dat = tcg_temp_new(); + TCGv mem_idx = tcg_temp_new(); + TCGv memop = tcg_temp_new(); + + gen_get_gpr(t0, a->rs1); + gen_get_gpr(dat, a->rs2); + tcg_gen_movi_tl(mem_idx, ctx->mem_idx); + tcg_gen_movi_tl(memop, MO_TEQ); + + gen_helper_hyp_store(cpu_env, t0, dat, mem_idx, memop); + + tcg_temp_free(t0); + tcg_temp_free(dat); + tcg_temp_free(mem_idx); + tcg_temp_free(memop); + return true; +#else + return false; +#endif +} +#endif + +static bool trans_hlvx_hu(DisasContext *ctx, arg_hlvx_hu *a) +{ + REQUIRE_EXT(ctx, RVH); +#ifndef CONFIG_USER_ONLY + TCGv t0 = tcg_temp_new(); + TCGv t1 = tcg_temp_new(); + TCGv mem_idx = tcg_temp_new(); + TCGv memop = tcg_temp_new(); + + gen_get_gpr(t0, a->rs1); + tcg_gen_movi_tl(mem_idx, ctx->mem_idx); + tcg_gen_movi_tl(memop, MO_TEUW); + + gen_helper_hyp_x_load(t1, cpu_env, t0, mem_idx, memop); + gen_set_gpr(a->rd, t1); + + tcg_temp_free(t0); + tcg_temp_free(t1); + tcg_temp_free(mem_idx); + tcg_temp_free(memop); + return true; +#else + return false; +#endif +} + +static bool trans_hlvx_wu(DisasContext *ctx, arg_hlvx_wu *a) +{ + REQUIRE_EXT(ctx, RVH); +#ifndef CONFIG_USER_ONLY + TCGv t0 = tcg_temp_new(); + TCGv t1 = tcg_temp_new(); + TCGv mem_idx = tcg_temp_new(); + TCGv memop = tcg_temp_new(); + + gen_get_gpr(t0, a->rs1); + tcg_gen_movi_tl(mem_idx, ctx->mem_idx); + tcg_gen_movi_tl(memop, MO_TEUL); + + gen_helper_hyp_x_load(t1, cpu_env, t0, mem_idx, memop); + gen_set_gpr(a->rd, t1); + + tcg_temp_free(t0); + tcg_temp_free(t1); + tcg_temp_free(mem_idx); + tcg_temp_free(memop); + return true; +#else + return false; +#endif +} + static bool trans_hfence_gvma(DisasContext *ctx, arg_sfence_vma *a) { REQUIRE_EXT(ctx, RVH); #ifndef CONFIG_USER_ONLY - gen_helper_hyp_tlb_flush(cpu_env); + gen_helper_hyp_gvma_tlb_flush(cpu_env); return true; #endif return false; diff --git a/target/riscv/op_helper.c b/target/riscv/op_helper.c index 7cccd42a1e..9b9ada45a9 100644 --- a/target/riscv/op_helper.c +++ b/target/riscv/op_helper.c @@ -43,8 +43,10 @@ target_ulong helper_csrrw(CPURISCVState *env, target_ulong src, target_ulong csr) { target_ulong val = 0; - if (riscv_csrrw(env, csr, &val, src, -1) < 0) { - riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + int ret = riscv_csrrw(env, csr, &val, src, -1); + + if (ret < 0) { + riscv_raise_exception(env, -ret, GETPC()); } return val; } @@ -53,8 +55,10 @@ target_ulong helper_csrrs(CPURISCVState *env, target_ulong src, target_ulong csr, target_ulong rs1_pass) { target_ulong val = 0; - if (riscv_csrrw(env, csr, &val, -1, rs1_pass ? src : 0) < 0) { - riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + int ret = riscv_csrrw(env, csr, &val, -1, rs1_pass ? src : 0); + + if (ret < 0) { + riscv_raise_exception(env, -ret, GETPC()); } return val; } @@ -63,8 +67,10 @@ target_ulong helper_csrrc(CPURISCVState *env, target_ulong src, target_ulong csr, target_ulong rs1_pass) { target_ulong val = 0; - if (riscv_csrrw(env, csr, &val, 0, rs1_pass ? src : 0) < 0) { - riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + int ret = riscv_csrrw(env, csr, &val, 0, rs1_pass ? src : 0); + + if (ret < 0) { + riscv_raise_exception(env, -ret, GETPC()); } return val; } @@ -88,6 +94,11 @@ target_ulong helper_sret(CPURISCVState *env, target_ulong cpu_pc_deb) riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); } + if (riscv_has_ext(env, RVH) && riscv_cpu_virt_enabled(env) && + get_field(env->hstatus, HSTATUS_VTSR)) { + riscv_raise_exception(env, RISCV_EXCP_VIRT_INSTRUCTION_FAULT, GETPC()); + } + mstatus = env->mstatus; if (riscv_has_ext(env, RVH) && !riscv_cpu_virt_enabled(env)) { @@ -97,12 +108,8 @@ target_ulong helper_sret(CPURISCVState *env, target_ulong cpu_pc_deb) prev_priv = get_field(mstatus, MSTATUS_SPP); prev_virt = get_field(hstatus, HSTATUS_SPV); - hstatus = set_field(hstatus, HSTATUS_SPV, - get_field(hstatus, HSTATUS_SP2V)); - mstatus = set_field(mstatus, MSTATUS_SPP, - get_field(hstatus, HSTATUS_SP2P)); - hstatus = set_field(hstatus, HSTATUS_SP2V, 0); - hstatus = set_field(hstatus, HSTATUS_SP2P, 0); + hstatus = set_field(hstatus, HSTATUS_SPV, 0); + mstatus = set_field(mstatus, MSTATUS_SPP, 0); mstatus = set_field(mstatus, SSTATUS_SIE, get_field(mstatus, SSTATUS_SPIE)); mstatus = set_field(mstatus, SSTATUS_SPIE, 1); @@ -174,7 +181,7 @@ void helper_wfi(CPURISCVState *env) if ((env->priv == PRV_S && get_field(env->mstatus, MSTATUS_TW)) || riscv_cpu_virt_enabled(env)) { - riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + riscv_raise_exception(env, RISCV_EXCP_VIRT_INSTRUCTION_FAULT, GETPC()); } else { cs->halted = 1; cs->exception_index = EXCP_HLT; @@ -189,6 +196,9 @@ void helper_tlb_flush(CPURISCVState *env) (env->priv == PRV_S && get_field(env->mstatus, MSTATUS_TVM))) { riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + } else if (riscv_has_ext(env, RVH) && riscv_cpu_virt_enabled(env) && + get_field(env->hstatus, HSTATUS_VTVM)) { + riscv_raise_exception(env, RISCV_EXCP_VIRT_INSTRUCTION_FAULT, GETPC()); } else { tlb_flush(cs); } @@ -198,6 +208,10 @@ void helper_hyp_tlb_flush(CPURISCVState *env) { CPUState *cs = env_cpu(env); + if (env->priv == PRV_S && riscv_cpu_virt_enabled(env)) { + riscv_raise_exception(env, RISCV_EXCP_VIRT_INSTRUCTION_FAULT, GETPC()); + } + if (env->priv == PRV_M || (env->priv == PRV_S && !riscv_cpu_virt_enabled(env))) { tlb_flush(cs); @@ -207,4 +221,140 @@ void helper_hyp_tlb_flush(CPURISCVState *env) riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); } +void helper_hyp_gvma_tlb_flush(CPURISCVState *env) +{ + if (env->priv == PRV_S && !riscv_cpu_virt_enabled(env) && + get_field(env->mstatus, MSTATUS_TVM)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + } + + helper_hyp_tlb_flush(env); +} + +target_ulong helper_hyp_load(CPURISCVState *env, target_ulong address, + target_ulong attrs, target_ulong memop) +{ + if (env->priv == PRV_M || + (env->priv == PRV_S && !riscv_cpu_virt_enabled(env)) || + (env->priv == PRV_U && !riscv_cpu_virt_enabled(env) && + get_field(env->hstatus, HSTATUS_HU))) { + target_ulong pte; + + riscv_cpu_set_two_stage_lookup(env, true); + + switch (memop) { + case MO_SB: + pte = cpu_ldsb_data_ra(env, address, GETPC()); + break; + case MO_UB: + pte = cpu_ldub_data_ra(env, address, GETPC()); + break; + case MO_TESW: + pte = cpu_ldsw_data_ra(env, address, GETPC()); + break; + case MO_TEUW: + pte = cpu_lduw_data_ra(env, address, GETPC()); + break; + case MO_TESL: + pte = cpu_ldl_data_ra(env, address, GETPC()); + break; + case MO_TEUL: + pte = cpu_ldl_data_ra(env, address, GETPC()); + break; + case MO_TEQ: + pte = cpu_ldq_data_ra(env, address, GETPC()); + break; + default: + g_assert_not_reached(); + } + + riscv_cpu_set_two_stage_lookup(env, false); + + return pte; + } + + if (riscv_cpu_virt_enabled(env)) { + riscv_raise_exception(env, RISCV_EXCP_VIRT_INSTRUCTION_FAULT, GETPC()); + } else { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + } + return 0; +} + +void helper_hyp_store(CPURISCVState *env, target_ulong address, + target_ulong val, target_ulong attrs, target_ulong memop) +{ + if (env->priv == PRV_M || + (env->priv == PRV_S && !riscv_cpu_virt_enabled(env)) || + (env->priv == PRV_U && !riscv_cpu_virt_enabled(env) && + get_field(env->hstatus, HSTATUS_HU))) { + riscv_cpu_set_two_stage_lookup(env, true); + + switch (memop) { + case MO_SB: + case MO_UB: + cpu_stb_data_ra(env, address, val, GETPC()); + break; + case MO_TESW: + case MO_TEUW: + cpu_stw_data_ra(env, address, val, GETPC()); + break; + case MO_TESL: + case MO_TEUL: + cpu_stl_data_ra(env, address, val, GETPC()); + break; + case MO_TEQ: + cpu_stq_data_ra(env, address, val, GETPC()); + break; + default: + g_assert_not_reached(); + } + + riscv_cpu_set_two_stage_lookup(env, false); + + return; + } + + if (riscv_cpu_virt_enabled(env)) { + riscv_raise_exception(env, RISCV_EXCP_VIRT_INSTRUCTION_FAULT, GETPC()); + } else { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + } +} + +target_ulong helper_hyp_x_load(CPURISCVState *env, target_ulong address, + target_ulong attrs, target_ulong memop) +{ + if (env->priv == PRV_M || + (env->priv == PRV_S && !riscv_cpu_virt_enabled(env)) || + (env->priv == PRV_U && !riscv_cpu_virt_enabled(env) && + get_field(env->hstatus, HSTATUS_HU))) { + target_ulong pte; + + riscv_cpu_set_two_stage_lookup(env, true); + + switch (memop) { + case MO_TEUL: + pte = cpu_ldub_data_ra(env, address, GETPC()); + break; + case MO_TEUW: + pte = cpu_lduw_data_ra(env, address, GETPC()); + break; + default: + g_assert_not_reached(); + } + + riscv_cpu_set_two_stage_lookup(env, false); + + return pte; + } + + if (riscv_cpu_virt_enabled(env)) { + riscv_raise_exception(env, RISCV_EXCP_VIRT_INSTRUCTION_FAULT, GETPC()); + } else { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + } + return 0; +} + #endif /* !CONFIG_USER_ONLY */ diff --git a/target/riscv/translate.c b/target/riscv/translate.c index 3919f570f7..79dca2291b 100644 --- a/target/riscv/translate.c +++ b/target/riscv/translate.c @@ -797,16 +797,6 @@ static void riscv_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cs) #if !defined(CONFIG_USER_ONLY) if (riscv_has_ext(env, RVH)) { ctx->virt_enabled = riscv_cpu_virt_enabled(env); - if (env->priv_ver == PRV_M && - get_field(env->mstatus, MSTATUS_MPRV) && - MSTATUS_MPV_ISSET(env)) { - ctx->virt_enabled = true; - } else if (env->priv == PRV_S && - !riscv_cpu_virt_enabled(env) && - get_field(env->hstatus, HSTATUS_SPRV) && - get_field(env->hstatus, HSTATUS_SPV)) { - ctx->virt_enabled = true; - } } else { ctx->virt_enabled = false; } diff --git a/tests/data/acpi/disassemle-aml.sh b/tests/data/acpi/disassemle-aml.sh index 1d8a4d0301..253b7620a0 100755 --- a/tests/data/acpi/disassemle-aml.sh +++ b/tests/data/acpi/disassemle-aml.sh @@ -42,11 +42,16 @@ do else extra="" fi - asl=${aml}.dsl if [[ "${outdir}" ]]; then - asl="${outdir}"/${machine}/${asl} + # iasl strips an extension from prefix if there. + # since we have some files with . in the name, the + # last component gets interpreted as an extension: + # add another extension to work around that. + prefix="-p ${outdir}/${aml}.dsl" + else + prefix="" fi - iasl -d -p ${asl} ${extra} ${aml} + iasl ${extra} ${prefix} -d ${aml} done done diff --git a/tests/data/acpi/pc/DSDT b/tests/data/acpi/pc/DSDT index 6d0aaf729a..b121bb5bc1 100644 --- a/tests/data/acpi/pc/DSDT +++ b/tests/data/acpi/pc/DSDT Binary files differdiff --git a/tests/data/acpi/pc/DSDT.acpihmat b/tests/data/acpi/pc/DSDT.acpihmat index 2e5e02400b..b0dbb943f4 100644 --- a/tests/data/acpi/pc/DSDT.acpihmat +++ b/tests/data/acpi/pc/DSDT.acpihmat Binary files differdiff --git a/tests/data/acpi/pc/DSDT.bridge b/tests/data/acpi/pc/DSDT.bridge index 623c4c0358..7b6c7a4787 100644 --- a/tests/data/acpi/pc/DSDT.bridge +++ b/tests/data/acpi/pc/DSDT.bridge Binary files differdiff --git a/tests/data/acpi/pc/DSDT.cphp b/tests/data/acpi/pc/DSDT.cphp index e0a43ccdad..c0e8aa5b32 100644 --- a/tests/data/acpi/pc/DSDT.cphp +++ b/tests/data/acpi/pc/DSDT.cphp Binary files differdiff --git a/tests/data/acpi/pc/DSDT.dimmpxm b/tests/data/acpi/pc/DSDT.dimmpxm index 21eb065a0e..1649953b6c 100644 --- a/tests/data/acpi/pc/DSDT.dimmpxm +++ b/tests/data/acpi/pc/DSDT.dimmpxm Binary files differdiff --git a/tests/data/acpi/pc/DSDT.ipmikcs b/tests/data/acpi/pc/DSDT.ipmikcs index b8f08f266b..92748d49dc 100644 --- a/tests/data/acpi/pc/DSDT.ipmikcs +++ b/tests/data/acpi/pc/DSDT.ipmikcs Binary files differdiff --git a/tests/data/acpi/pc/DSDT.memhp b/tests/data/acpi/pc/DSDT.memhp index 9a9418f4bd..4026772906 100644 --- a/tests/data/acpi/pc/DSDT.memhp +++ b/tests/data/acpi/pc/DSDT.memhp Binary files differdiff --git a/tests/data/acpi/pc/DSDT.numamem b/tests/data/acpi/pc/DSDT.numamem index 6eec385c2e..4d9ba337a8 100644 --- a/tests/data/acpi/pc/DSDT.numamem +++ b/tests/data/acpi/pc/DSDT.numamem Binary files differdiff --git a/tests/data/acpi/q35/DSDT b/tests/data/acpi/q35/DSDT index e63676d7a6..bba8884073 100644 --- a/tests/data/acpi/q35/DSDT +++ b/tests/data/acpi/q35/DSDT Binary files differdiff --git a/tests/data/acpi/q35/DSDT.acpihmat b/tests/data/acpi/q35/DSDT.acpihmat index cd97b81982..9cac92418b 100644 --- a/tests/data/acpi/q35/DSDT.acpihmat +++ b/tests/data/acpi/q35/DSDT.acpihmat Binary files differdiff --git a/tests/data/acpi/q35/DSDT.bridge b/tests/data/acpi/q35/DSDT.bridge index 8b0fb497db..f08b7245f5 100644 --- a/tests/data/acpi/q35/DSDT.bridge +++ b/tests/data/acpi/q35/DSDT.bridge Binary files differdiff --git a/tests/data/acpi/q35/DSDT.cphp b/tests/data/acpi/q35/DSDT.cphp index d9bb414e9b..57d859cef9 100644 --- a/tests/data/acpi/q35/DSDT.cphp +++ b/tests/data/acpi/q35/DSDT.cphp Binary files differdiff --git a/tests/data/acpi/q35/DSDT.dimmpxm b/tests/data/acpi/q35/DSDT.dimmpxm index 29f19b22a3..9d5bd5744e 100644 --- a/tests/data/acpi/q35/DSDT.dimmpxm +++ b/tests/data/acpi/q35/DSDT.dimmpxm Binary files differdiff --git a/tests/data/acpi/q35/DSDT.ipmibt b/tests/data/acpi/q35/DSDT.ipmibt index e8dea1ea42..5cd11de6a8 100644 --- a/tests/data/acpi/q35/DSDT.ipmibt +++ b/tests/data/acpi/q35/DSDT.ipmibt Binary files differdiff --git a/tests/data/acpi/q35/DSDT.memhp b/tests/data/acpi/q35/DSDT.memhp index dca76db15b..05a7a73ec4 100644 --- a/tests/data/acpi/q35/DSDT.memhp +++ b/tests/data/acpi/q35/DSDT.memhp Binary files differdiff --git a/tests/data/acpi/q35/DSDT.mmio64 b/tests/data/acpi/q35/DSDT.mmio64 index 6d8facd9e1..efd3f1188f 100644 --- a/tests/data/acpi/q35/DSDT.mmio64 +++ b/tests/data/acpi/q35/DSDT.mmio64 Binary files differdiff --git a/tests/data/acpi/q35/DSDT.numamem b/tests/data/acpi/q35/DSDT.numamem index 737325dc30..1978b55f12 100644 --- a/tests/data/acpi/q35/DSDT.numamem +++ b/tests/data/acpi/q35/DSDT.numamem Binary files differdiff --git a/tests/data/acpi/q35/DSDT.tis b/tests/data/acpi/q35/DSDT.tis index 27ee927fc5..638de38726 100644 --- a/tests/data/acpi/q35/DSDT.tis +++ b/tests/data/acpi/q35/DSDT.tis Binary files differdiff --git a/tests/data/acpi/virt/DSDT b/tests/data/acpi/virt/DSDT index e669508d17..9b002836f3 100644 --- a/tests/data/acpi/virt/DSDT +++ b/tests/data/acpi/virt/DSDT Binary files differdiff --git a/tests/data/acpi/virt/DSDT.memhp b/tests/data/acpi/virt/DSDT.memhp index 4cb81f692d..545a18c365 100644 --- a/tests/data/acpi/virt/DSDT.memhp +++ b/tests/data/acpi/virt/DSDT.memhp Binary files differdiff --git a/tests/data/acpi/virt/DSDT.numamem b/tests/data/acpi/virt/DSDT.numamem index e669508d17..9b002836f3 100644 --- a/tests/data/acpi/virt/DSDT.numamem +++ b/tests/data/acpi/virt/DSDT.numamem Binary files differdiff --git a/tests/qemu-iotests/031.out b/tests/qemu-iotests/031.out index 4b21d6a9ba..0054c2ed97 100644 --- a/tests/qemu-iotests/031.out +++ b/tests/qemu-iotests/031.out @@ -117,7 +117,7 @@ header_length 112 Header extension: magic 0x6803f857 (Feature table) -length 336 +length 384 data <binary> Header extension: @@ -150,7 +150,7 @@ header_length 112 Header extension: magic 0x6803f857 (Feature table) -length 336 +length 384 data <binary> Header extension: @@ -164,7 +164,7 @@ No errors were found on the image. magic 0x514649fb version 3 -backing_file_offset 0x210 +backing_file_offset 0x240 backing_file_size 0x17 cluster_bits 16 size 67108864 @@ -188,7 +188,7 @@ data 'host_device' Header extension: magic 0x6803f857 (Feature table) -length 336 +length 384 data <binary> Header extension: diff --git a/tests/qemu-iotests/036.out b/tests/qemu-iotests/036.out index a9bed828e5..1fa7cad28d 100644 --- a/tests/qemu-iotests/036.out +++ b/tests/qemu-iotests/036.out @@ -26,7 +26,7 @@ compatible_features [] autoclear_features [63] Header extension: magic 0x6803f857 (Feature table) -length 336 +length 384 data <binary> @@ -38,7 +38,7 @@ compatible_features [] autoclear_features [] Header extension: magic 0x6803f857 (Feature table) -length 336 +length 384 data <binary> *** done diff --git a/tests/qemu-iotests/049.out b/tests/qemu-iotests/049.out index 4c21dc70a5..a7e220830d 100644 --- a/tests/qemu-iotests/049.out +++ b/tests/qemu-iotests/049.out @@ -4,90 +4,90 @@ QA output created by 049 == 1. Traditional size parameter == qemu-img create -f qcow2 TEST_DIR/t.qcow2 1024 -Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 cluster_size=65536 compression_type=zlib size=1024 lazy_refcounts=off refcount_bits=16 +Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 cluster_size=65536 extended_l2=off compression_type=zlib size=1024 lazy_refcounts=off refcount_bits=16 qemu-img create -f qcow2 TEST_DIR/t.qcow2 1024b -Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 cluster_size=65536 compression_type=zlib size=1024 lazy_refcounts=off refcount_bits=16 +Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 cluster_size=65536 extended_l2=off compression_type=zlib size=1024 lazy_refcounts=off refcount_bits=16 qemu-img create -f qcow2 TEST_DIR/t.qcow2 1k -Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 cluster_size=65536 compression_type=zlib size=1024 lazy_refcounts=off refcount_bits=16 +Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 cluster_size=65536 extended_l2=off compression_type=zlib size=1024 lazy_refcounts=off refcount_bits=16 qemu-img create -f qcow2 TEST_DIR/t.qcow2 1K -Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 cluster_size=65536 compression_type=zlib size=1024 lazy_refcounts=off refcount_bits=16 +Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 cluster_size=65536 extended_l2=off compression_type=zlib size=1024 lazy_refcounts=off refcount_bits=16 qemu-img create -f qcow2 TEST_DIR/t.qcow2 1M -Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 cluster_size=65536 compression_type=zlib size=1048576 lazy_refcounts=off refcount_bits=16 +Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 cluster_size=65536 extended_l2=off compression_type=zlib size=1048576 lazy_refcounts=off refcount_bits=16 qemu-img create -f qcow2 TEST_DIR/t.qcow2 1G -Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 cluster_size=65536 compression_type=zlib size=1073741824 lazy_refcounts=off refcount_bits=16 +Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 cluster_size=65536 extended_l2=off compression_type=zlib size=1073741824 lazy_refcounts=off refcount_bits=16 qemu-img create -f qcow2 TEST_DIR/t.qcow2 1T -Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 cluster_size=65536 compression_type=zlib size=1099511627776 lazy_refcounts=off refcount_bits=16 +Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 cluster_size=65536 extended_l2=off compression_type=zlib size=1099511627776 lazy_refcounts=off refcount_bits=16 qemu-img create -f qcow2 TEST_DIR/t.qcow2 1024.0 -Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 cluster_size=65536 compression_type=zlib size=1024 lazy_refcounts=off refcount_bits=16 +Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 cluster_size=65536 extended_l2=off compression_type=zlib size=1024 lazy_refcounts=off refcount_bits=16 qemu-img create -f qcow2 TEST_DIR/t.qcow2 1024.0b -Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 cluster_size=65536 compression_type=zlib size=1024 lazy_refcounts=off refcount_bits=16 +Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 cluster_size=65536 extended_l2=off compression_type=zlib size=1024 lazy_refcounts=off refcount_bits=16 qemu-img create -f qcow2 TEST_DIR/t.qcow2 1.5k -Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 cluster_size=65536 compression_type=zlib size=1536 lazy_refcounts=off refcount_bits=16 +Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 cluster_size=65536 extended_l2=off compression_type=zlib size=1536 lazy_refcounts=off refcount_bits=16 qemu-img create -f qcow2 TEST_DIR/t.qcow2 1.5K -Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 cluster_size=65536 compression_type=zlib size=1536 lazy_refcounts=off refcount_bits=16 +Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 cluster_size=65536 extended_l2=off compression_type=zlib size=1536 lazy_refcounts=off refcount_bits=16 qemu-img create -f qcow2 TEST_DIR/t.qcow2 1.5M -Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 cluster_size=65536 compression_type=zlib size=1572864 lazy_refcounts=off refcount_bits=16 +Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 cluster_size=65536 extended_l2=off compression_type=zlib size=1572864 lazy_refcounts=off refcount_bits=16 qemu-img create -f qcow2 TEST_DIR/t.qcow2 1.5G -Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 cluster_size=65536 compression_type=zlib size=1610612736 lazy_refcounts=off refcount_bits=16 +Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 cluster_size=65536 extended_l2=off compression_type=zlib size=1610612736 lazy_refcounts=off refcount_bits=16 qemu-img create -f qcow2 TEST_DIR/t.qcow2 1.5T -Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 cluster_size=65536 compression_type=zlib size=1649267441664 lazy_refcounts=off refcount_bits=16 +Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 cluster_size=65536 extended_l2=off compression_type=zlib size=1649267441664 lazy_refcounts=off refcount_bits=16 == 2. Specifying size via -o == qemu-img create -f qcow2 -o size=1024 TEST_DIR/t.qcow2 -Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 cluster_size=65536 compression_type=zlib size=1024 lazy_refcounts=off refcount_bits=16 +Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 cluster_size=65536 extended_l2=off compression_type=zlib size=1024 lazy_refcounts=off refcount_bits=16 qemu-img create -f qcow2 -o size=1024b TEST_DIR/t.qcow2 -Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 cluster_size=65536 compression_type=zlib size=1024 lazy_refcounts=off refcount_bits=16 +Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 cluster_size=65536 extended_l2=off compression_type=zlib size=1024 lazy_refcounts=off refcount_bits=16 qemu-img create -f qcow2 -o size=1k TEST_DIR/t.qcow2 -Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 cluster_size=65536 compression_type=zlib size=1024 lazy_refcounts=off refcount_bits=16 +Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 cluster_size=65536 extended_l2=off compression_type=zlib size=1024 lazy_refcounts=off refcount_bits=16 qemu-img create -f qcow2 -o size=1K TEST_DIR/t.qcow2 -Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 cluster_size=65536 compression_type=zlib size=1024 lazy_refcounts=off refcount_bits=16 +Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 cluster_size=65536 extended_l2=off compression_type=zlib size=1024 lazy_refcounts=off refcount_bits=16 qemu-img create -f qcow2 -o size=1M TEST_DIR/t.qcow2 -Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 cluster_size=65536 compression_type=zlib size=1048576 lazy_refcounts=off refcount_bits=16 +Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 cluster_size=65536 extended_l2=off compression_type=zlib size=1048576 lazy_refcounts=off refcount_bits=16 qemu-img create -f qcow2 -o size=1G TEST_DIR/t.qcow2 -Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 cluster_size=65536 compression_type=zlib size=1073741824 lazy_refcounts=off refcount_bits=16 +Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 cluster_size=65536 extended_l2=off compression_type=zlib size=1073741824 lazy_refcounts=off refcount_bits=16 qemu-img create -f qcow2 -o size=1T TEST_DIR/t.qcow2 -Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 cluster_size=65536 compression_type=zlib size=1099511627776 lazy_refcounts=off refcount_bits=16 +Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 cluster_size=65536 extended_l2=off compression_type=zlib size=1099511627776 lazy_refcounts=off refcount_bits=16 qemu-img create -f qcow2 -o size=1024.0 TEST_DIR/t.qcow2 -Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 cluster_size=65536 compression_type=zlib size=1024 lazy_refcounts=off refcount_bits=16 +Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 cluster_size=65536 extended_l2=off compression_type=zlib size=1024 lazy_refcounts=off refcount_bits=16 qemu-img create -f qcow2 -o size=1024.0b TEST_DIR/t.qcow2 -Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 cluster_size=65536 compression_type=zlib size=1024 lazy_refcounts=off refcount_bits=16 +Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 cluster_size=65536 extended_l2=off compression_type=zlib size=1024 lazy_refcounts=off refcount_bits=16 qemu-img create -f qcow2 -o size=1.5k TEST_DIR/t.qcow2 -Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 cluster_size=65536 compression_type=zlib size=1536 lazy_refcounts=off refcount_bits=16 +Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 cluster_size=65536 extended_l2=off compression_type=zlib size=1536 lazy_refcounts=off refcount_bits=16 qemu-img create -f qcow2 -o size=1.5K TEST_DIR/t.qcow2 -Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 cluster_size=65536 compression_type=zlib size=1536 lazy_refcounts=off refcount_bits=16 +Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 cluster_size=65536 extended_l2=off compression_type=zlib size=1536 lazy_refcounts=off refcount_bits=16 qemu-img create -f qcow2 -o size=1.5M TEST_DIR/t.qcow2 -Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 cluster_size=65536 compression_type=zlib size=1572864 lazy_refcounts=off refcount_bits=16 +Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 cluster_size=65536 extended_l2=off compression_type=zlib size=1572864 lazy_refcounts=off refcount_bits=16 qemu-img create -f qcow2 -o size=1.5G TEST_DIR/t.qcow2 -Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 cluster_size=65536 compression_type=zlib size=1610612736 lazy_refcounts=off refcount_bits=16 +Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 cluster_size=65536 extended_l2=off compression_type=zlib size=1610612736 lazy_refcounts=off refcount_bits=16 qemu-img create -f qcow2 -o size=1.5T TEST_DIR/t.qcow2 -Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 cluster_size=65536 compression_type=zlib size=1649267441664 lazy_refcounts=off refcount_bits=16 +Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 cluster_size=65536 extended_l2=off compression_type=zlib size=1649267441664 lazy_refcounts=off refcount_bits=16 == 3. Invalid sizes == @@ -129,84 +129,84 @@ qemu-img: TEST_DIR/t.qcow2: The image size must be specified only once == Check correct interpretation of suffixes for cluster size == qemu-img create -f qcow2 -o cluster_size=1024 TEST_DIR/t.qcow2 64M -Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 cluster_size=1024 compression_type=zlib size=67108864 lazy_refcounts=off refcount_bits=16 +Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 cluster_size=1024 extended_l2=off compression_type=zlib size=67108864 lazy_refcounts=off refcount_bits=16 qemu-img create -f qcow2 -o cluster_size=1024b TEST_DIR/t.qcow2 64M -Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 cluster_size=1024 compression_type=zlib size=67108864 lazy_refcounts=off refcount_bits=16 +Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 cluster_size=1024 extended_l2=off compression_type=zlib size=67108864 lazy_refcounts=off refcount_bits=16 qemu-img create -f qcow2 -o cluster_size=1k TEST_DIR/t.qcow2 64M -Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 cluster_size=1024 compression_type=zlib size=67108864 lazy_refcounts=off refcount_bits=16 +Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 cluster_size=1024 extended_l2=off compression_type=zlib size=67108864 lazy_refcounts=off refcount_bits=16 qemu-img create -f qcow2 -o cluster_size=1K TEST_DIR/t.qcow2 64M -Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 cluster_size=1024 compression_type=zlib size=67108864 lazy_refcounts=off refcount_bits=16 +Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 cluster_size=1024 extended_l2=off compression_type=zlib size=67108864 lazy_refcounts=off refcount_bits=16 qemu-img create -f qcow2 -o cluster_size=1M TEST_DIR/t.qcow2 64M -Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 cluster_size=1048576 compression_type=zlib size=67108864 lazy_refcounts=off refcount_bits=16 +Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 cluster_size=1048576 extended_l2=off compression_type=zlib size=67108864 lazy_refcounts=off refcount_bits=16 qemu-img create -f qcow2 -o cluster_size=1024.0 TEST_DIR/t.qcow2 64M -Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 cluster_size=1024 compression_type=zlib size=67108864 lazy_refcounts=off refcount_bits=16 +Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 cluster_size=1024 extended_l2=off compression_type=zlib size=67108864 lazy_refcounts=off refcount_bits=16 qemu-img create -f qcow2 -o cluster_size=1024.0b TEST_DIR/t.qcow2 64M -Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 cluster_size=1024 compression_type=zlib size=67108864 lazy_refcounts=off refcount_bits=16 +Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 cluster_size=1024 extended_l2=off compression_type=zlib size=67108864 lazy_refcounts=off refcount_bits=16 qemu-img create -f qcow2 -o cluster_size=0.5k TEST_DIR/t.qcow2 64M -Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 cluster_size=512 compression_type=zlib size=67108864 lazy_refcounts=off refcount_bits=16 +Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 cluster_size=512 extended_l2=off compression_type=zlib size=67108864 lazy_refcounts=off refcount_bits=16 qemu-img create -f qcow2 -o cluster_size=0.5K TEST_DIR/t.qcow2 64M -Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 cluster_size=512 compression_type=zlib size=67108864 lazy_refcounts=off refcount_bits=16 +Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 cluster_size=512 extended_l2=off compression_type=zlib size=67108864 lazy_refcounts=off refcount_bits=16 qemu-img create -f qcow2 -o cluster_size=0.5M TEST_DIR/t.qcow2 64M -Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 cluster_size=524288 compression_type=zlib size=67108864 lazy_refcounts=off refcount_bits=16 +Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 cluster_size=524288 extended_l2=off compression_type=zlib size=67108864 lazy_refcounts=off refcount_bits=16 == Check compat level option == qemu-img create -f qcow2 -o compat=0.10 TEST_DIR/t.qcow2 64M -Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 cluster_size=65536 compression_type=zlib size=67108864 compat=0.10 lazy_refcounts=off refcount_bits=16 +Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 cluster_size=65536 extended_l2=off compression_type=zlib size=67108864 compat=0.10 lazy_refcounts=off refcount_bits=16 qemu-img create -f qcow2 -o compat=1.1 TEST_DIR/t.qcow2 64M -Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 cluster_size=65536 compression_type=zlib size=67108864 compat=1.1 lazy_refcounts=off refcount_bits=16 +Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 cluster_size=65536 extended_l2=off compression_type=zlib size=67108864 compat=1.1 lazy_refcounts=off refcount_bits=16 qemu-img create -f qcow2 -o compat=0.42 TEST_DIR/t.qcow2 64M -Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 cluster_size=65536 compression_type=zlib size=67108864 compat=0.42 lazy_refcounts=off refcount_bits=16 +Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 cluster_size=65536 extended_l2=off compression_type=zlib size=67108864 compat=0.42 lazy_refcounts=off refcount_bits=16 qemu-img: TEST_DIR/t.qcow2: Invalid parameter '0.42' qemu-img create -f qcow2 -o compat=foobar TEST_DIR/t.qcow2 64M -Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 cluster_size=65536 compression_type=zlib size=67108864 compat=foobar lazy_refcounts=off refcount_bits=16 +Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 cluster_size=65536 extended_l2=off compression_type=zlib size=67108864 compat=foobar lazy_refcounts=off refcount_bits=16 qemu-img: TEST_DIR/t.qcow2: Invalid parameter 'foobar' == Check preallocation option == qemu-img create -f qcow2 -o preallocation=off TEST_DIR/t.qcow2 64M -Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 cluster_size=65536 preallocation=off compression_type=zlib size=67108864 lazy_refcounts=off refcount_bits=16 +Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 cluster_size=65536 extended_l2=off preallocation=off compression_type=zlib size=67108864 lazy_refcounts=off refcount_bits=16 qemu-img create -f qcow2 -o preallocation=metadata TEST_DIR/t.qcow2 64M -Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 cluster_size=65536 preallocation=metadata compression_type=zlib size=67108864 lazy_refcounts=off refcount_bits=16 +Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 cluster_size=65536 extended_l2=off preallocation=metadata compression_type=zlib size=67108864 lazy_refcounts=off refcount_bits=16 qemu-img create -f qcow2 -o preallocation=1234 TEST_DIR/t.qcow2 64M -Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 cluster_size=65536 preallocation=1234 compression_type=zlib size=67108864 lazy_refcounts=off refcount_bits=16 +Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 cluster_size=65536 extended_l2=off preallocation=1234 compression_type=zlib size=67108864 lazy_refcounts=off refcount_bits=16 qemu-img: TEST_DIR/t.qcow2: Invalid parameter '1234' == Check encryption option == qemu-img create -f qcow2 -o encryption=off TEST_DIR/t.qcow2 64M -Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 encryption=off cluster_size=65536 compression_type=zlib size=67108864 lazy_refcounts=off refcount_bits=16 +Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 encryption=off cluster_size=65536 extended_l2=off compression_type=zlib size=67108864 lazy_refcounts=off refcount_bits=16 qemu-img create -f qcow2 --object secret,id=sec0,data=123456 -o encryption=on,encrypt.key-secret=sec0 TEST_DIR/t.qcow2 64M -Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 encryption=on encrypt.key-secret=sec0 cluster_size=65536 compression_type=zlib size=67108864 lazy_refcounts=off refcount_bits=16 +Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 encryption=on encrypt.key-secret=sec0 cluster_size=65536 extended_l2=off compression_type=zlib size=67108864 lazy_refcounts=off refcount_bits=16 == Check lazy_refcounts option (only with v3) == qemu-img create -f qcow2 -o compat=1.1,lazy_refcounts=off TEST_DIR/t.qcow2 64M -Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 cluster_size=65536 compression_type=zlib size=67108864 compat=1.1 lazy_refcounts=off refcount_bits=16 +Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 cluster_size=65536 extended_l2=off compression_type=zlib size=67108864 compat=1.1 lazy_refcounts=off refcount_bits=16 qemu-img create -f qcow2 -o compat=1.1,lazy_refcounts=on TEST_DIR/t.qcow2 64M -Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 cluster_size=65536 compression_type=zlib size=67108864 compat=1.1 lazy_refcounts=on refcount_bits=16 +Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 cluster_size=65536 extended_l2=off compression_type=zlib size=67108864 compat=1.1 lazy_refcounts=on refcount_bits=16 qemu-img create -f qcow2 -o compat=0.10,lazy_refcounts=off TEST_DIR/t.qcow2 64M -Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 cluster_size=65536 compression_type=zlib size=67108864 compat=0.10 lazy_refcounts=off refcount_bits=16 +Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 cluster_size=65536 extended_l2=off compression_type=zlib size=67108864 compat=0.10 lazy_refcounts=off refcount_bits=16 qemu-img create -f qcow2 -o compat=0.10,lazy_refcounts=on TEST_DIR/t.qcow2 64M -Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 cluster_size=65536 compression_type=zlib size=67108864 compat=0.10 lazy_refcounts=on refcount_bits=16 +Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 cluster_size=65536 extended_l2=off compression_type=zlib size=67108864 compat=0.10 lazy_refcounts=on refcount_bits=16 qemu-img: TEST_DIR/t.qcow2: Lazy refcounts only supported with compatibility level 1.1 and above (use version=v3 or greater) *** done diff --git a/tests/qemu-iotests/060.out b/tests/qemu-iotests/060.out index e574c38797..b74540bafb 100644 --- a/tests/qemu-iotests/060.out +++ b/tests/qemu-iotests/060.out @@ -21,6 +21,7 @@ Format specific information: lazy refcounts: false refcount bits: 16 corrupt: true + extended l2: false qemu-io: can't open device TEST_DIR/t.IMGFMT: IMGFMT: Image is corrupt; cannot be opened read/write no file open, try 'help open' read 512/512 bytes at offset 0 @@ -320,7 +321,7 @@ discard 65536/65536 bytes at offset 0 qcow2: Marking image as corrupt: Preallocated zero cluster offset 0x2a00 unaligned (guest offset: 0); further corruption events will be suppressed write failed: Input/output error --- Repairing --- -Repairing offset=2a00: Preallocated zero cluster is not properly aligned; L2 entry corrupted. +Repairing offset=2a00: Preallocated cluster is not properly aligned; L2 entry corrupted. The following inconsistencies were found and repaired: 0 leaked clusters diff --git a/tests/qemu-iotests/061 b/tests/qemu-iotests/061 index 08ddbdd10c..5747beb7ed 100755 --- a/tests/qemu-iotests/061 +++ b/tests/qemu-iotests/061 @@ -304,6 +304,12 @@ _img_info --format-specific _check_test_img echo +echo "=== Testing version downgrade with extended L2 entries ===" +echo +_make_test_img -o "compat=1.1,extended_l2=on" 64M +$QEMU_IMG amend -o "compat=0.10" "$TEST_IMG" + +echo echo "=== Try changing the external data file ===" echo _make_test_img -o "compat=1.1" 64M diff --git a/tests/qemu-iotests/061.out b/tests/qemu-iotests/061.out index b2d2dfed04..ee30da2665 100644 --- a/tests/qemu-iotests/061.out +++ b/tests/qemu-iotests/061.out @@ -26,7 +26,7 @@ header_length 112 Header extension: magic 0x6803f857 (Feature table) -length 336 +length 384 data <binary> magic 0x514649fb @@ -84,7 +84,7 @@ header_length 112 Header extension: magic 0x6803f857 (Feature table) -length 336 +length 384 data <binary> magic 0x514649fb @@ -140,7 +140,7 @@ header_length 112 Header extension: magic 0x6803f857 (Feature table) -length 336 +length 384 data <binary> ERROR cluster 5 refcount=0 reference=1 @@ -195,7 +195,7 @@ header_length 112 Header extension: magic 0x6803f857 (Feature table) -length 336 +length 384 data <binary> magic 0x514649fb @@ -264,7 +264,7 @@ header_length 112 Header extension: magic 0x6803f857 (Feature table) -length 336 +length 384 data <binary> read 65536/65536 bytes at offset 44040192 @@ -326,7 +326,7 @@ header_length 112 Header extension: magic 0x6803f857 (Feature table) -length 336 +length 384 data <binary> ERROR cluster 5 refcount=0 reference=1 @@ -355,7 +355,7 @@ header_length 112 Header extension: magic 0x6803f857 (Feature table) -length 336 +length 384 data <binary> read 131072/131072 bytes at offset 0 @@ -530,8 +530,14 @@ Format specific information: data file: TEST_DIR/t.IMGFMT.data data file raw: false corrupt: false + extended l2: false No errors were found on the image. +=== Testing version downgrade with extended L2 entries === + +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=67108864 +qemu-img: Cannot downgrade an image with incompatible features 0x10 set + === Try changing the external data file === Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=67108864 @@ -551,6 +557,7 @@ Format specific information: data file: foo data file raw: false corrupt: false + extended l2: false qemu-img: Could not open 'TEST_DIR/t.IMGFMT': 'data-file' is required for this image image: TEST_DIR/t.IMGFMT @@ -564,6 +571,7 @@ Format specific information: refcount bits: 16 data file raw: false corrupt: false + extended l2: false === Clearing and setting data-file-raw === @@ -580,6 +588,7 @@ Format specific information: data file: TEST_DIR/t.IMGFMT.data data file raw: true corrupt: false + extended l2: false No errors were found on the image. image: TEST_DIR/t.IMGFMT file format: IMGFMT @@ -593,6 +602,7 @@ Format specific information: data file: TEST_DIR/t.IMGFMT.data data file raw: false corrupt: false + extended l2: false No errors were found on the image. qemu-img: data-file-raw cannot be set on existing images image: TEST_DIR/t.IMGFMT @@ -607,5 +617,6 @@ Format specific information: data file: TEST_DIR/t.IMGFMT.data data file raw: false corrupt: false + extended l2: false No errors were found on the image. *** done diff --git a/tests/qemu-iotests/065 b/tests/qemu-iotests/065 index 18dc488c7a..29a7f7ad60 100755 --- a/tests/qemu-iotests/065 +++ b/tests/qemu-iotests/065 @@ -98,20 +98,20 @@ class TestQCow3NotLazy(TestQemuImgInfo): img_options = 'compat=1.1,lazy_refcounts=off' json_compare = { 'compat': '1.1', 'lazy-refcounts': False, 'refcount-bits': 16, 'corrupt': False, - 'compression-type': 'zlib' } + 'compression-type': 'zlib', 'extended-l2': False } human_compare = [ 'compat: 1.1', 'compression type: zlib', 'lazy refcounts: false', 'refcount bits: 16', - 'corrupt: false' ] + 'corrupt: false', 'extended l2: false' ] class TestQCow3Lazy(TestQemuImgInfo): '''Testing a qcow2 version 3 image with lazy refcounts enabled''' img_options = 'compat=1.1,lazy_refcounts=on' json_compare = { 'compat': '1.1', 'lazy-refcounts': True, 'refcount-bits': 16, 'corrupt': False, - 'compression-type': 'zlib' } + 'compression-type': 'zlib', 'extended-l2': False } human_compare = [ 'compat: 1.1', 'compression type: zlib', 'lazy refcounts: true', 'refcount bits: 16', - 'corrupt: false' ] + 'corrupt: false', 'extended l2: false' ] class TestQCow3NotLazyQMP(TestQMP): '''Testing a qcow2 version 3 image with lazy refcounts disabled, opening @@ -120,7 +120,7 @@ class TestQCow3NotLazyQMP(TestQMP): qemu_options = 'lazy-refcounts=on' compare = { 'compat': '1.1', 'lazy-refcounts': False, 'refcount-bits': 16, 'corrupt': False, - 'compression-type': 'zlib' } + 'compression-type': 'zlib', 'extended-l2': False } class TestQCow3LazyQMP(TestQMP): @@ -130,7 +130,7 @@ class TestQCow3LazyQMP(TestQMP): qemu_options = 'lazy-refcounts=off' compare = { 'compat': '1.1', 'lazy-refcounts': True, 'refcount-bits': 16, 'corrupt': False, - 'compression-type': 'zlib' } + 'compression-type': 'zlib', 'extended-l2': False } TestImageInfoSpecific = None TestQemuImgInfo = None diff --git a/tests/qemu-iotests/082.out b/tests/qemu-iotests/082.out index 1728aff1e0..b70c12c139 100644 --- a/tests/qemu-iotests/082.out +++ b/tests/qemu-iotests/082.out @@ -3,14 +3,14 @@ QA output created by 082 === create: Options specified more than once === Testing: create -f foo -f qcow2 TEST_DIR/t.qcow2 128M -Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 cluster_size=65536 compression_type=zlib size=134217728 lazy_refcounts=off refcount_bits=16 +Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 cluster_size=65536 extended_l2=off compression_type=zlib size=134217728 lazy_refcounts=off refcount_bits=16 image: TEST_DIR/t.IMGFMT file format: IMGFMT virtual size: 128 MiB (134217728 bytes) cluster_size: 65536 Testing: create -f qcow2 -o cluster_size=4k -o lazy_refcounts=on TEST_DIR/t.qcow2 128M -Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 cluster_size=4096 compression_type=zlib size=134217728 lazy_refcounts=on refcount_bits=16 +Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 cluster_size=4096 extended_l2=off compression_type=zlib size=134217728 lazy_refcounts=on refcount_bits=16 image: TEST_DIR/t.IMGFMT file format: IMGFMT virtual size: 128 MiB (134217728 bytes) @@ -21,9 +21,10 @@ Format specific information: lazy refcounts: true refcount bits: 16 corrupt: false + extended l2: false Testing: create -f qcow2 -o cluster_size=4k -o lazy_refcounts=on -o cluster_size=8k TEST_DIR/t.qcow2 128M -Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 cluster_size=8192 compression_type=zlib size=134217728 lazy_refcounts=on refcount_bits=16 +Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 cluster_size=8192 extended_l2=off compression_type=zlib size=134217728 lazy_refcounts=on refcount_bits=16 image: TEST_DIR/t.IMGFMT file format: IMGFMT virtual size: 128 MiB (134217728 bytes) @@ -34,9 +35,10 @@ Format specific information: lazy refcounts: true refcount bits: 16 corrupt: false + extended l2: false Testing: create -f qcow2 -o cluster_size=4k,cluster_size=8k TEST_DIR/t.qcow2 128M -Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 cluster_size=8192 compression_type=zlib size=134217728 lazy_refcounts=off refcount_bits=16 +Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 cluster_size=8192 extended_l2=off compression_type=zlib size=134217728 lazy_refcounts=off refcount_bits=16 image: TEST_DIR/t.IMGFMT file format: IMGFMT virtual size: 128 MiB (134217728 bytes) @@ -62,6 +64,7 @@ Supported options: encrypt.ivgen-hash-alg=<str> - Name of IV generator hash algorithm encrypt.key-secret=<str> - ID of secret providing qcow AES key or LUKS passphrase encryption=<bool (on/off)> - Encrypt the image with format 'aes'. (Deprecated in favor of encrypt.format=aes) + extended_l2=<bool (on/off)> - Extended L2 tables extent_size_hint=<size> - Extent size hint for the image file, 0 to disable lazy_refcounts=<bool (on/off)> - Postpone refcount updates nocow=<bool (on/off)> - Turn off copy-on-write (valid only on btrfs) @@ -87,6 +90,7 @@ Supported options: encrypt.ivgen-hash-alg=<str> - Name of IV generator hash algorithm encrypt.key-secret=<str> - ID of secret providing qcow AES key or LUKS passphrase encryption=<bool (on/off)> - Encrypt the image with format 'aes'. (Deprecated in favor of encrypt.format=aes) + extended_l2=<bool (on/off)> - Extended L2 tables extent_size_hint=<size> - Extent size hint for the image file, 0 to disable lazy_refcounts=<bool (on/off)> - Postpone refcount updates nocow=<bool (on/off)> - Turn off copy-on-write (valid only on btrfs) @@ -112,6 +116,7 @@ Supported options: encrypt.ivgen-hash-alg=<str> - Name of IV generator hash algorithm encrypt.key-secret=<str> - ID of secret providing qcow AES key or LUKS passphrase encryption=<bool (on/off)> - Encrypt the image with format 'aes'. (Deprecated in favor of encrypt.format=aes) + extended_l2=<bool (on/off)> - Extended L2 tables extent_size_hint=<size> - Extent size hint for the image file, 0 to disable lazy_refcounts=<bool (on/off)> - Postpone refcount updates nocow=<bool (on/off)> - Turn off copy-on-write (valid only on btrfs) @@ -137,6 +142,7 @@ Supported options: encrypt.ivgen-hash-alg=<str> - Name of IV generator hash algorithm encrypt.key-secret=<str> - ID of secret providing qcow AES key or LUKS passphrase encryption=<bool (on/off)> - Encrypt the image with format 'aes'. (Deprecated in favor of encrypt.format=aes) + extended_l2=<bool (on/off)> - Extended L2 tables extent_size_hint=<size> - Extent size hint for the image file, 0 to disable lazy_refcounts=<bool (on/off)> - Postpone refcount updates nocow=<bool (on/off)> - Turn off copy-on-write (valid only on btrfs) @@ -162,6 +168,7 @@ Supported options: encrypt.ivgen-hash-alg=<str> - Name of IV generator hash algorithm encrypt.key-secret=<str> - ID of secret providing qcow AES key or LUKS passphrase encryption=<bool (on/off)> - Encrypt the image with format 'aes'. (Deprecated in favor of encrypt.format=aes) + extended_l2=<bool (on/off)> - Extended L2 tables extent_size_hint=<size> - Extent size hint for the image file, 0 to disable lazy_refcounts=<bool (on/off)> - Postpone refcount updates nocow=<bool (on/off)> - Turn off copy-on-write (valid only on btrfs) @@ -187,6 +194,7 @@ Supported options: encrypt.ivgen-hash-alg=<str> - Name of IV generator hash algorithm encrypt.key-secret=<str> - ID of secret providing qcow AES key or LUKS passphrase encryption=<bool (on/off)> - Encrypt the image with format 'aes'. (Deprecated in favor of encrypt.format=aes) + extended_l2=<bool (on/off)> - Extended L2 tables extent_size_hint=<size> - Extent size hint for the image file, 0 to disable lazy_refcounts=<bool (on/off)> - Postpone refcount updates nocow=<bool (on/off)> - Turn off copy-on-write (valid only on btrfs) @@ -212,6 +220,7 @@ Supported options: encrypt.ivgen-hash-alg=<str> - Name of IV generator hash algorithm encrypt.key-secret=<str> - ID of secret providing qcow AES key or LUKS passphrase encryption=<bool (on/off)> - Encrypt the image with format 'aes'. (Deprecated in favor of encrypt.format=aes) + extended_l2=<bool (on/off)> - Extended L2 tables extent_size_hint=<size> - Extent size hint for the image file, 0 to disable lazy_refcounts=<bool (on/off)> - Postpone refcount updates nocow=<bool (on/off)> - Turn off copy-on-write (valid only on btrfs) @@ -237,6 +246,7 @@ Supported options: encrypt.ivgen-hash-alg=<str> - Name of IV generator hash algorithm encrypt.key-secret=<str> - ID of secret providing qcow AES key or LUKS passphrase encryption=<bool (on/off)> - Encrypt the image with format 'aes'. (Deprecated in favor of encrypt.format=aes) + extended_l2=<bool (on/off)> - Extended L2 tables extent_size_hint=<size> - Extent size hint for the image file, 0 to disable lazy_refcounts=<bool (on/off)> - Postpone refcount updates nocow=<bool (on/off)> - Turn off copy-on-write (valid only on btrfs) @@ -245,10 +255,10 @@ Supported options: size=<size> - Virtual disk size Testing: create -f qcow2 -u -o backing_file=TEST_DIR/t.qcow2,,help -F qcow2 TEST_DIR/t.qcow2 128M -Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 cluster_size=65536 compression_type=zlib size=134217728 backing_file=TEST_DIR/t.qcow2,,help backing_fmt=qcow2 lazy_refcounts=off refcount_bits=16 +Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 cluster_size=65536 extended_l2=off compression_type=zlib size=134217728 backing_file=TEST_DIR/t.qcow2,,help backing_fmt=qcow2 lazy_refcounts=off refcount_bits=16 Testing: create -f qcow2 -u -o backing_file=TEST_DIR/t.qcow2,,? -F qcow2 TEST_DIR/t.qcow2 128M -Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 cluster_size=65536 compression_type=zlib size=134217728 backing_file=TEST_DIR/t.qcow2,,? backing_fmt=qcow2 lazy_refcounts=off refcount_bits=16 +Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 cluster_size=65536 extended_l2=off compression_type=zlib size=134217728 backing_file=TEST_DIR/t.qcow2,,? backing_fmt=qcow2 lazy_refcounts=off refcount_bits=16 Testing: create -f qcow2 -o backing_file=TEST_DIR/t.qcow2, -o help TEST_DIR/t.qcow2 128M qemu-img: Invalid option list: backing_file=TEST_DIR/t.qcow2, @@ -277,6 +287,7 @@ Supported qcow2 options: encrypt.ivgen-hash-alg=<str> - Name of IV generator hash algorithm encrypt.key-secret=<str> - ID of secret providing qcow AES key or LUKS passphrase encryption=<bool (on/off)> - Encrypt the image with format 'aes'. (Deprecated in favor of encrypt.format=aes) + extended_l2=<bool (on/off)> - Extended L2 tables lazy_refcounts=<bool (on/off)> - Postpone refcount updates preallocation=<str> - Preallocation mode (allowed values: off, metadata, falloc, full) refcount_bits=<num> - Width of a reference count entry in bits @@ -298,7 +309,7 @@ qemu-img: Format driver 'bochs' does not support image creation === convert: Options specified more than once === Testing: create -f qcow2 TEST_DIR/t.qcow2 128M -Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 cluster_size=65536 compression_type=zlib size=134217728 lazy_refcounts=off refcount_bits=16 +Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 cluster_size=65536 extended_l2=off compression_type=zlib size=134217728 lazy_refcounts=off refcount_bits=16 Testing: convert -f foo -f qcow2 TEST_DIR/t.qcow2 TEST_DIR/t.qcow2.base image: TEST_DIR/t.IMGFMT.base @@ -322,6 +333,7 @@ Format specific information: lazy refcounts: true refcount bits: 16 corrupt: false + extended l2: false Testing: convert -O qcow2 -o cluster_size=4k -o lazy_refcounts=on -o cluster_size=8k TEST_DIR/t.qcow2 TEST_DIR/t.qcow2.base image: TEST_DIR/t.IMGFMT.base @@ -334,6 +346,7 @@ Format specific information: lazy refcounts: true refcount bits: 16 corrupt: false + extended l2: false Testing: convert -O qcow2 -o cluster_size=4k,cluster_size=8k TEST_DIR/t.qcow2 TEST_DIR/t.qcow2.base image: TEST_DIR/t.IMGFMT.base @@ -361,6 +374,7 @@ Supported options: encrypt.ivgen-hash-alg=<str> - Name of IV generator hash algorithm encrypt.key-secret=<str> - ID of secret providing qcow AES key or LUKS passphrase encryption=<bool (on/off)> - Encrypt the image with format 'aes'. (Deprecated in favor of encrypt.format=aes) + extended_l2=<bool (on/off)> - Extended L2 tables extent_size_hint=<size> - Extent size hint for the image file, 0 to disable lazy_refcounts=<bool (on/off)> - Postpone refcount updates nocow=<bool (on/off)> - Turn off copy-on-write (valid only on btrfs) @@ -386,6 +400,7 @@ Supported options: encrypt.ivgen-hash-alg=<str> - Name of IV generator hash algorithm encrypt.key-secret=<str> - ID of secret providing qcow AES key or LUKS passphrase encryption=<bool (on/off)> - Encrypt the image with format 'aes'. (Deprecated in favor of encrypt.format=aes) + extended_l2=<bool (on/off)> - Extended L2 tables extent_size_hint=<size> - Extent size hint for the image file, 0 to disable lazy_refcounts=<bool (on/off)> - Postpone refcount updates nocow=<bool (on/off)> - Turn off copy-on-write (valid only on btrfs) @@ -411,6 +426,7 @@ Supported options: encrypt.ivgen-hash-alg=<str> - Name of IV generator hash algorithm encrypt.key-secret=<str> - ID of secret providing qcow AES key or LUKS passphrase encryption=<bool (on/off)> - Encrypt the image with format 'aes'. (Deprecated in favor of encrypt.format=aes) + extended_l2=<bool (on/off)> - Extended L2 tables extent_size_hint=<size> - Extent size hint for the image file, 0 to disable lazy_refcounts=<bool (on/off)> - Postpone refcount updates nocow=<bool (on/off)> - Turn off copy-on-write (valid only on btrfs) @@ -436,6 +452,7 @@ Supported options: encrypt.ivgen-hash-alg=<str> - Name of IV generator hash algorithm encrypt.key-secret=<str> - ID of secret providing qcow AES key or LUKS passphrase encryption=<bool (on/off)> - Encrypt the image with format 'aes'. (Deprecated in favor of encrypt.format=aes) + extended_l2=<bool (on/off)> - Extended L2 tables extent_size_hint=<size> - Extent size hint for the image file, 0 to disable lazy_refcounts=<bool (on/off)> - Postpone refcount updates nocow=<bool (on/off)> - Turn off copy-on-write (valid only on btrfs) @@ -461,6 +478,7 @@ Supported options: encrypt.ivgen-hash-alg=<str> - Name of IV generator hash algorithm encrypt.key-secret=<str> - ID of secret providing qcow AES key or LUKS passphrase encryption=<bool (on/off)> - Encrypt the image with format 'aes'. (Deprecated in favor of encrypt.format=aes) + extended_l2=<bool (on/off)> - Extended L2 tables extent_size_hint=<size> - Extent size hint for the image file, 0 to disable lazy_refcounts=<bool (on/off)> - Postpone refcount updates nocow=<bool (on/off)> - Turn off copy-on-write (valid only on btrfs) @@ -486,6 +504,7 @@ Supported options: encrypt.ivgen-hash-alg=<str> - Name of IV generator hash algorithm encrypt.key-secret=<str> - ID of secret providing qcow AES key or LUKS passphrase encryption=<bool (on/off)> - Encrypt the image with format 'aes'. (Deprecated in favor of encrypt.format=aes) + extended_l2=<bool (on/off)> - Extended L2 tables extent_size_hint=<size> - Extent size hint for the image file, 0 to disable lazy_refcounts=<bool (on/off)> - Postpone refcount updates nocow=<bool (on/off)> - Turn off copy-on-write (valid only on btrfs) @@ -511,6 +530,7 @@ Supported options: encrypt.ivgen-hash-alg=<str> - Name of IV generator hash algorithm encrypt.key-secret=<str> - ID of secret providing qcow AES key or LUKS passphrase encryption=<bool (on/off)> - Encrypt the image with format 'aes'. (Deprecated in favor of encrypt.format=aes) + extended_l2=<bool (on/off)> - Extended L2 tables extent_size_hint=<size> - Extent size hint for the image file, 0 to disable lazy_refcounts=<bool (on/off)> - Postpone refcount updates nocow=<bool (on/off)> - Turn off copy-on-write (valid only on btrfs) @@ -536,6 +556,7 @@ Supported options: encrypt.ivgen-hash-alg=<str> - Name of IV generator hash algorithm encrypt.key-secret=<str> - ID of secret providing qcow AES key or LUKS passphrase encryption=<bool (on/off)> - Encrypt the image with format 'aes'. (Deprecated in favor of encrypt.format=aes) + extended_l2=<bool (on/off)> - Extended L2 tables extent_size_hint=<size> - Extent size hint for the image file, 0 to disable lazy_refcounts=<bool (on/off)> - Postpone refcount updates nocow=<bool (on/off)> - Turn off copy-on-write (valid only on btrfs) @@ -576,6 +597,7 @@ Supported qcow2 options: encrypt.ivgen-hash-alg=<str> - Name of IV generator hash algorithm encrypt.key-secret=<str> - ID of secret providing qcow AES key or LUKS passphrase encryption=<bool (on/off)> - Encrypt the image with format 'aes'. (Deprecated in favor of encrypt.format=aes) + extended_l2=<bool (on/off)> - Extended L2 tables lazy_refcounts=<bool (on/off)> - Postpone refcount updates preallocation=<str> - Preallocation mode (allowed values: off, metadata, falloc, full) refcount_bits=<num> - Width of a reference count entry in bits @@ -621,6 +643,7 @@ Format specific information: lazy refcounts: true refcount bits: 16 corrupt: false + extended l2: false Testing: amend -f qcow2 -o size=130M -o lazy_refcounts=off TEST_DIR/t.qcow2 image: TEST_DIR/t.IMGFMT @@ -633,6 +656,7 @@ Format specific information: lazy refcounts: false refcount bits: 16 corrupt: false + extended l2: false Testing: amend -f qcow2 -o size=8M -o lazy_refcounts=on -o size=132M TEST_DIR/t.qcow2 image: TEST_DIR/t.IMGFMT @@ -645,6 +669,7 @@ Format specific information: lazy refcounts: true refcount bits: 16 corrupt: false + extended l2: false Testing: amend -f qcow2 -o size=4M,size=148M TEST_DIR/t.qcow2 image: TEST_DIR/t.IMGFMT diff --git a/tests/qemu-iotests/085.out b/tests/qemu-iotests/085.out index 700658d5a3..7fc44b1c61 100644 --- a/tests/qemu-iotests/085.out +++ b/tests/qemu-iotests/085.out @@ -13,7 +13,7 @@ Formatting 'TEST_DIR/t.IMGFMT.2', fmt=IMGFMT size=134217728 === Create a single snapshot on virtio0 === { 'execute': 'blockdev-snapshot-sync', 'arguments': { 'device': 'virtio0', 'snapshot-file':'TEST_DIR/1-snapshot-v0.IMGFMT', 'format': 'IMGFMT' } } -Formatting 'TEST_DIR/1-snapshot-v0.qcow2', fmt=qcow2 cluster_size=65536 compression_type=zlib size=134217728 backing_file=TEST_DIR/t.qcow2.1 backing_fmt=qcow2 lazy_refcounts=off refcount_bits=16 +Formatting 'TEST_DIR/1-snapshot-v0.qcow2', fmt=qcow2 cluster_size=65536 extended_l2=off compression_type=zlib size=134217728 backing_file=TEST_DIR/t.qcow2.1 backing_fmt=qcow2 lazy_refcounts=off refcount_bits=16 {"return": {}} === Invalid command - missing device and nodename === @@ -30,40 +30,40 @@ Formatting 'TEST_DIR/1-snapshot-v0.qcow2', fmt=qcow2 cluster_size=65536 compress === Create several transactional group snapshots === { 'execute': 'transaction', 'arguments': {'actions': [ { 'type': 'blockdev-snapshot-sync', 'data' : { 'device': 'virtio0', 'snapshot-file': 'TEST_DIR/2-snapshot-v0.IMGFMT' } }, { 'type': 'blockdev-snapshot-sync', 'data' : { 'device': 'virtio1', 'snapshot-file': 'TEST_DIR/2-snapshot-v1.IMGFMT' } } ] } } -Formatting 'TEST_DIR/2-snapshot-v0.qcow2', fmt=qcow2 cluster_size=65536 compression_type=zlib size=134217728 backing_file=TEST_DIR/1-snapshot-v0.qcow2 backing_fmt=qcow2 lazy_refcounts=off refcount_bits=16 -Formatting 'TEST_DIR/2-snapshot-v1.qcow2', fmt=qcow2 cluster_size=65536 compression_type=zlib size=134217728 backing_file=TEST_DIR/t.qcow2.2 backing_fmt=qcow2 lazy_refcounts=off refcount_bits=16 +Formatting 'TEST_DIR/2-snapshot-v0.qcow2', fmt=qcow2 cluster_size=65536 extended_l2=off compression_type=zlib size=134217728 backing_file=TEST_DIR/1-snapshot-v0.qcow2 backing_fmt=qcow2 lazy_refcounts=off refcount_bits=16 +Formatting 'TEST_DIR/2-snapshot-v1.qcow2', fmt=qcow2 cluster_size=65536 extended_l2=off compression_type=zlib size=134217728 backing_file=TEST_DIR/t.qcow2.2 backing_fmt=qcow2 lazy_refcounts=off refcount_bits=16 {"return": {}} { 'execute': 'transaction', 'arguments': {'actions': [ { 'type': 'blockdev-snapshot-sync', 'data' : { 'device': 'virtio0', 'snapshot-file': 'TEST_DIR/3-snapshot-v0.IMGFMT' } }, { 'type': 'blockdev-snapshot-sync', 'data' : { 'device': 'virtio1', 'snapshot-file': 'TEST_DIR/3-snapshot-v1.IMGFMT' } } ] } } -Formatting 'TEST_DIR/3-snapshot-v0.qcow2', fmt=qcow2 cluster_size=65536 compression_type=zlib size=134217728 backing_file=TEST_DIR/2-snapshot-v0.qcow2 backing_fmt=qcow2 lazy_refcounts=off refcount_bits=16 -Formatting 'TEST_DIR/3-snapshot-v1.qcow2', fmt=qcow2 cluster_size=65536 compression_type=zlib size=134217728 backing_file=TEST_DIR/2-snapshot-v1.qcow2 backing_fmt=qcow2 lazy_refcounts=off refcount_bits=16 +Formatting 'TEST_DIR/3-snapshot-v0.qcow2', fmt=qcow2 cluster_size=65536 extended_l2=off compression_type=zlib size=134217728 backing_file=TEST_DIR/2-snapshot-v0.qcow2 backing_fmt=qcow2 lazy_refcounts=off refcount_bits=16 +Formatting 'TEST_DIR/3-snapshot-v1.qcow2', fmt=qcow2 cluster_size=65536 extended_l2=off compression_type=zlib size=134217728 backing_file=TEST_DIR/2-snapshot-v1.qcow2 backing_fmt=qcow2 lazy_refcounts=off refcount_bits=16 {"return": {}} { 'execute': 'transaction', 'arguments': {'actions': [ { 'type': 'blockdev-snapshot-sync', 'data' : { 'device': 'virtio0', 'snapshot-file': 'TEST_DIR/4-snapshot-v0.IMGFMT' } }, { 'type': 'blockdev-snapshot-sync', 'data' : { 'device': 'virtio1', 'snapshot-file': 'TEST_DIR/4-snapshot-v1.IMGFMT' } } ] } } -Formatting 'TEST_DIR/4-snapshot-v0.qcow2', fmt=qcow2 cluster_size=65536 compression_type=zlib size=134217728 backing_file=TEST_DIR/3-snapshot-v0.qcow2 backing_fmt=qcow2 lazy_refcounts=off refcount_bits=16 -Formatting 'TEST_DIR/4-snapshot-v1.qcow2', fmt=qcow2 cluster_size=65536 compression_type=zlib size=134217728 backing_file=TEST_DIR/3-snapshot-v1.qcow2 backing_fmt=qcow2 lazy_refcounts=off refcount_bits=16 +Formatting 'TEST_DIR/4-snapshot-v0.qcow2', fmt=qcow2 cluster_size=65536 extended_l2=off compression_type=zlib size=134217728 backing_file=TEST_DIR/3-snapshot-v0.qcow2 backing_fmt=qcow2 lazy_refcounts=off refcount_bits=16 +Formatting 'TEST_DIR/4-snapshot-v1.qcow2', fmt=qcow2 cluster_size=65536 extended_l2=off compression_type=zlib size=134217728 backing_file=TEST_DIR/3-snapshot-v1.qcow2 backing_fmt=qcow2 lazy_refcounts=off refcount_bits=16 {"return": {}} { 'execute': 'transaction', 'arguments': {'actions': [ { 'type': 'blockdev-snapshot-sync', 'data' : { 'device': 'virtio0', 'snapshot-file': 'TEST_DIR/5-snapshot-v0.IMGFMT' } }, { 'type': 'blockdev-snapshot-sync', 'data' : { 'device': 'virtio1', 'snapshot-file': 'TEST_DIR/5-snapshot-v1.IMGFMT' } } ] } } -Formatting 'TEST_DIR/5-snapshot-v0.qcow2', fmt=qcow2 cluster_size=65536 compression_type=zlib size=134217728 backing_file=TEST_DIR/4-snapshot-v0.qcow2 backing_fmt=qcow2 lazy_refcounts=off refcount_bits=16 -Formatting 'TEST_DIR/5-snapshot-v1.qcow2', fmt=qcow2 cluster_size=65536 compression_type=zlib size=134217728 backing_file=TEST_DIR/4-snapshot-v1.qcow2 backing_fmt=qcow2 lazy_refcounts=off refcount_bits=16 +Formatting 'TEST_DIR/5-snapshot-v0.qcow2', fmt=qcow2 cluster_size=65536 extended_l2=off compression_type=zlib size=134217728 backing_file=TEST_DIR/4-snapshot-v0.qcow2 backing_fmt=qcow2 lazy_refcounts=off refcount_bits=16 +Formatting 'TEST_DIR/5-snapshot-v1.qcow2', fmt=qcow2 cluster_size=65536 extended_l2=off compression_type=zlib size=134217728 backing_file=TEST_DIR/4-snapshot-v1.qcow2 backing_fmt=qcow2 lazy_refcounts=off refcount_bits=16 {"return": {}} { 'execute': 'transaction', 'arguments': {'actions': [ { 'type': 'blockdev-snapshot-sync', 'data' : { 'device': 'virtio0', 'snapshot-file': 'TEST_DIR/6-snapshot-v0.IMGFMT' } }, { 'type': 'blockdev-snapshot-sync', 'data' : { 'device': 'virtio1', 'snapshot-file': 'TEST_DIR/6-snapshot-v1.IMGFMT' } } ] } } -Formatting 'TEST_DIR/6-snapshot-v0.qcow2', fmt=qcow2 cluster_size=65536 compression_type=zlib size=134217728 backing_file=TEST_DIR/5-snapshot-v0.qcow2 backing_fmt=qcow2 lazy_refcounts=off refcount_bits=16 -Formatting 'TEST_DIR/6-snapshot-v1.qcow2', fmt=qcow2 cluster_size=65536 compression_type=zlib size=134217728 backing_file=TEST_DIR/5-snapshot-v1.qcow2 backing_fmt=qcow2 lazy_refcounts=off refcount_bits=16 +Formatting 'TEST_DIR/6-snapshot-v0.qcow2', fmt=qcow2 cluster_size=65536 extended_l2=off compression_type=zlib size=134217728 backing_file=TEST_DIR/5-snapshot-v0.qcow2 backing_fmt=qcow2 lazy_refcounts=off refcount_bits=16 +Formatting 'TEST_DIR/6-snapshot-v1.qcow2', fmt=qcow2 cluster_size=65536 extended_l2=off compression_type=zlib size=134217728 backing_file=TEST_DIR/5-snapshot-v1.qcow2 backing_fmt=qcow2 lazy_refcounts=off refcount_bits=16 {"return": {}} { 'execute': 'transaction', 'arguments': {'actions': [ { 'type': 'blockdev-snapshot-sync', 'data' : { 'device': 'virtio0', 'snapshot-file': 'TEST_DIR/7-snapshot-v0.IMGFMT' } }, { 'type': 'blockdev-snapshot-sync', 'data' : { 'device': 'virtio1', 'snapshot-file': 'TEST_DIR/7-snapshot-v1.IMGFMT' } } ] } } -Formatting 'TEST_DIR/7-snapshot-v0.qcow2', fmt=qcow2 cluster_size=65536 compression_type=zlib size=134217728 backing_file=TEST_DIR/6-snapshot-v0.qcow2 backing_fmt=qcow2 lazy_refcounts=off refcount_bits=16 -Formatting 'TEST_DIR/7-snapshot-v1.qcow2', fmt=qcow2 cluster_size=65536 compression_type=zlib size=134217728 backing_file=TEST_DIR/6-snapshot-v1.qcow2 backing_fmt=qcow2 lazy_refcounts=off refcount_bits=16 +Formatting 'TEST_DIR/7-snapshot-v0.qcow2', fmt=qcow2 cluster_size=65536 extended_l2=off compression_type=zlib size=134217728 backing_file=TEST_DIR/6-snapshot-v0.qcow2 backing_fmt=qcow2 lazy_refcounts=off refcount_bits=16 +Formatting 'TEST_DIR/7-snapshot-v1.qcow2', fmt=qcow2 cluster_size=65536 extended_l2=off compression_type=zlib size=134217728 backing_file=TEST_DIR/6-snapshot-v1.qcow2 backing_fmt=qcow2 lazy_refcounts=off refcount_bits=16 {"return": {}} { 'execute': 'transaction', 'arguments': {'actions': [ { 'type': 'blockdev-snapshot-sync', 'data' : { 'device': 'virtio0', 'snapshot-file': 'TEST_DIR/8-snapshot-v0.IMGFMT' } }, { 'type': 'blockdev-snapshot-sync', 'data' : { 'device': 'virtio1', 'snapshot-file': 'TEST_DIR/8-snapshot-v1.IMGFMT' } } ] } } -Formatting 'TEST_DIR/8-snapshot-v0.qcow2', fmt=qcow2 cluster_size=65536 compression_type=zlib size=134217728 backing_file=TEST_DIR/7-snapshot-v0.qcow2 backing_fmt=qcow2 lazy_refcounts=off refcount_bits=16 -Formatting 'TEST_DIR/8-snapshot-v1.qcow2', fmt=qcow2 cluster_size=65536 compression_type=zlib size=134217728 backing_file=TEST_DIR/7-snapshot-v1.qcow2 backing_fmt=qcow2 lazy_refcounts=off refcount_bits=16 +Formatting 'TEST_DIR/8-snapshot-v0.qcow2', fmt=qcow2 cluster_size=65536 extended_l2=off compression_type=zlib size=134217728 backing_file=TEST_DIR/7-snapshot-v0.qcow2 backing_fmt=qcow2 lazy_refcounts=off refcount_bits=16 +Formatting 'TEST_DIR/8-snapshot-v1.qcow2', fmt=qcow2 cluster_size=65536 extended_l2=off compression_type=zlib size=134217728 backing_file=TEST_DIR/7-snapshot-v1.qcow2 backing_fmt=qcow2 lazy_refcounts=off refcount_bits=16 {"return": {}} { 'execute': 'transaction', 'arguments': {'actions': [ { 'type': 'blockdev-snapshot-sync', 'data' : { 'device': 'virtio0', 'snapshot-file': 'TEST_DIR/9-snapshot-v0.IMGFMT' } }, { 'type': 'blockdev-snapshot-sync', 'data' : { 'device': 'virtio1', 'snapshot-file': 'TEST_DIR/9-snapshot-v1.IMGFMT' } } ] } } -Formatting 'TEST_DIR/9-snapshot-v0.qcow2', fmt=qcow2 cluster_size=65536 compression_type=zlib size=134217728 backing_file=TEST_DIR/8-snapshot-v0.qcow2 backing_fmt=qcow2 lazy_refcounts=off refcount_bits=16 -Formatting 'TEST_DIR/9-snapshot-v1.qcow2', fmt=qcow2 cluster_size=65536 compression_type=zlib size=134217728 backing_file=TEST_DIR/8-snapshot-v1.qcow2 backing_fmt=qcow2 lazy_refcounts=off refcount_bits=16 +Formatting 'TEST_DIR/9-snapshot-v0.qcow2', fmt=qcow2 cluster_size=65536 extended_l2=off compression_type=zlib size=134217728 backing_file=TEST_DIR/8-snapshot-v0.qcow2 backing_fmt=qcow2 lazy_refcounts=off refcount_bits=16 +Formatting 'TEST_DIR/9-snapshot-v1.qcow2', fmt=qcow2 cluster_size=65536 extended_l2=off compression_type=zlib size=134217728 backing_file=TEST_DIR/8-snapshot-v1.qcow2 backing_fmt=qcow2 lazy_refcounts=off refcount_bits=16 {"return": {}} { 'execute': 'transaction', 'arguments': {'actions': [ { 'type': 'blockdev-snapshot-sync', 'data' : { 'device': 'virtio0', 'snapshot-file': 'TEST_DIR/10-snapshot-v0.IMGFMT' } }, { 'type': 'blockdev-snapshot-sync', 'data' : { 'device': 'virtio1', 'snapshot-file': 'TEST_DIR/10-snapshot-v1.IMGFMT' } } ] } } -Formatting 'TEST_DIR/10-snapshot-v0.qcow2', fmt=qcow2 cluster_size=65536 compression_type=zlib size=134217728 backing_file=TEST_DIR/9-snapshot-v0.qcow2 backing_fmt=qcow2 lazy_refcounts=off refcount_bits=16 -Formatting 'TEST_DIR/10-snapshot-v1.qcow2', fmt=qcow2 cluster_size=65536 compression_type=zlib size=134217728 backing_file=TEST_DIR/9-snapshot-v1.qcow2 backing_fmt=qcow2 lazy_refcounts=off refcount_bits=16 +Formatting 'TEST_DIR/10-snapshot-v0.qcow2', fmt=qcow2 cluster_size=65536 extended_l2=off compression_type=zlib size=134217728 backing_file=TEST_DIR/9-snapshot-v0.qcow2 backing_fmt=qcow2 lazy_refcounts=off refcount_bits=16 +Formatting 'TEST_DIR/10-snapshot-v1.qcow2', fmt=qcow2 cluster_size=65536 extended_l2=off compression_type=zlib size=134217728 backing_file=TEST_DIR/9-snapshot-v1.qcow2 backing_fmt=qcow2 lazy_refcounts=off refcount_bits=16 {"return": {}} === Create a couple of snapshots using blockdev-snapshot === diff --git a/tests/qemu-iotests/144.out b/tests/qemu-iotests/144.out index a2172a1308..13e0c4f5a7 100644 --- a/tests/qemu-iotests/144.out +++ b/tests/qemu-iotests/144.out @@ -9,7 +9,7 @@ Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=536870912 { 'execute': 'qmp_capabilities' } {"return": {}} { 'execute': 'blockdev-snapshot-sync', 'arguments': { 'device': 'virtio0', 'snapshot-file':'TEST_DIR/tmp.IMGFMT', 'format': 'IMGFMT' } } -Formatting 'TEST_DIR/tmp.qcow2', fmt=qcow2 cluster_size=65536 compression_type=zlib size=536870912 backing_file=TEST_DIR/t.qcow2 backing_fmt=qcow2 lazy_refcounts=off refcount_bits=16 +Formatting 'TEST_DIR/tmp.qcow2', fmt=qcow2 cluster_size=65536 extended_l2=off compression_type=zlib size=536870912 backing_file=TEST_DIR/t.qcow2 backing_fmt=qcow2 lazy_refcounts=off refcount_bits=16 {"return": {}} === Performing block-commit on active layer === @@ -31,6 +31,6 @@ Formatting 'TEST_DIR/tmp.qcow2', fmt=qcow2 cluster_size=65536 compression_type=z === Performing Live Snapshot 2 === { 'execute': 'blockdev-snapshot-sync', 'arguments': { 'device': 'virtio0', 'snapshot-file':'TEST_DIR/tmp2.IMGFMT', 'format': 'IMGFMT' } } -Formatting 'TEST_DIR/tmp2.qcow2', fmt=qcow2 cluster_size=65536 compression_type=zlib size=536870912 backing_file=TEST_DIR/t.qcow2 backing_fmt=qcow2 lazy_refcounts=off refcount_bits=16 +Formatting 'TEST_DIR/tmp2.qcow2', fmt=qcow2 cluster_size=65536 extended_l2=off compression_type=zlib size=536870912 backing_file=TEST_DIR/t.qcow2 backing_fmt=qcow2 lazy_refcounts=off refcount_bits=16 {"return": {}} *** done diff --git a/tests/qemu-iotests/182.out b/tests/qemu-iotests/182.out index 29e9db3497..ce23340670 100644 --- a/tests/qemu-iotests/182.out +++ b/tests/qemu-iotests/182.out @@ -13,7 +13,7 @@ Is another process using the image [TEST_DIR/t.qcow2]? {'execute': 'blockdev-add', 'arguments': { 'node-name': 'node0', 'driver': 'file', 'filename': 'TEST_DIR/t.IMGFMT', 'locking': 'on' } } {"return": {}} {'execute': 'blockdev-snapshot-sync', 'arguments': { 'node-name': 'node0', 'snapshot-file': 'TEST_DIR/t.IMGFMT.overlay', 'snapshot-node-name': 'node1' } } -Formatting 'TEST_DIR/t.qcow2.overlay', fmt=qcow2 cluster_size=65536 compression_type=zlib size=197120 backing_file=TEST_DIR/t.qcow2 backing_fmt=file lazy_refcounts=off refcount_bits=16 +Formatting 'TEST_DIR/t.qcow2.overlay', fmt=qcow2 cluster_size=65536 extended_l2=off compression_type=zlib size=197120 backing_file=TEST_DIR/t.qcow2 backing_fmt=file lazy_refcounts=off refcount_bits=16 {"return": {}} {'execute': 'blockdev-add', 'arguments': { 'node-name': 'node1', 'driver': 'file', 'filename': 'TEST_DIR/t.IMGFMT', 'locking': 'on' } } {"return": {}} diff --git a/tests/qemu-iotests/185.out b/tests/qemu-iotests/185.out index 62d1ab74d3..339438ac68 100644 --- a/tests/qemu-iotests/185.out +++ b/tests/qemu-iotests/185.out @@ -9,14 +9,14 @@ Formatting 'TEST_DIR/t.IMGFMT.base', fmt=IMGFMT size=67108864 === Creating backing chain === { 'execute': 'blockdev-snapshot-sync', 'arguments': { 'device': 'disk', 'snapshot-file': 'TEST_DIR/t.IMGFMT.mid', 'format': 'IMGFMT', 'mode': 'absolute-paths' } } -Formatting 'TEST_DIR/t.qcow2.mid', fmt=qcow2 cluster_size=65536 compression_type=zlib size=67108864 backing_file=TEST_DIR/t.qcow2.base backing_fmt=qcow2 lazy_refcounts=off refcount_bits=16 +Formatting 'TEST_DIR/t.qcow2.mid', fmt=qcow2 cluster_size=65536 extended_l2=off compression_type=zlib size=67108864 backing_file=TEST_DIR/t.qcow2.base backing_fmt=qcow2 lazy_refcounts=off refcount_bits=16 {"return": {}} { 'execute': 'human-monitor-command', 'arguments': { 'command-line': 'qemu-io disk "write 0 4M"' } } wrote 4194304/4194304 bytes at offset 0 4 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) {"return": ""} { 'execute': 'blockdev-snapshot-sync', 'arguments': { 'device': 'disk', 'snapshot-file': 'TEST_DIR/t.IMGFMT', 'format': 'IMGFMT', 'mode': 'absolute-paths' } } -Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 cluster_size=65536 compression_type=zlib size=67108864 backing_file=TEST_DIR/t.qcow2.mid backing_fmt=qcow2 lazy_refcounts=off refcount_bits=16 +Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 cluster_size=65536 extended_l2=off compression_type=zlib size=67108864 backing_file=TEST_DIR/t.qcow2.mid backing_fmt=qcow2 lazy_refcounts=off refcount_bits=16 {"return": {}} === Start commit job and exit qemu === @@ -48,7 +48,7 @@ Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 cluster_size=65536 compression_type=zli { 'execute': 'qmp_capabilities' } {"return": {}} { 'execute': 'drive-mirror', 'arguments': { 'device': 'disk', 'target': 'TEST_DIR/t.IMGFMT.copy', 'format': 'IMGFMT', 'sync': 'full', 'speed': 65536 } } -Formatting 'TEST_DIR/t.qcow2.copy', fmt=qcow2 cluster_size=65536 compression_type=zlib size=67108864 lazy_refcounts=off refcount_bits=16 +Formatting 'TEST_DIR/t.qcow2.copy', fmt=qcow2 cluster_size=65536 extended_l2=off compression_type=zlib size=67108864 lazy_refcounts=off refcount_bits=16 {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "created", "id": "disk"}} {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "running", "id": "disk"}} {"return": {}} @@ -62,7 +62,7 @@ Formatting 'TEST_DIR/t.qcow2.copy', fmt=qcow2 cluster_size=65536 compression_typ { 'execute': 'qmp_capabilities' } {"return": {}} { 'execute': 'drive-backup', 'arguments': { 'device': 'disk', 'target': 'TEST_DIR/t.IMGFMT.copy', 'format': 'IMGFMT', 'sync': 'full', 'speed': 65536 } } -Formatting 'TEST_DIR/t.qcow2.copy', fmt=qcow2 cluster_size=65536 compression_type=zlib size=67108864 lazy_refcounts=off refcount_bits=16 +Formatting 'TEST_DIR/t.qcow2.copy', fmt=qcow2 cluster_size=65536 extended_l2=off compression_type=zlib size=67108864 lazy_refcounts=off refcount_bits=16 {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "created", "id": "disk"}} {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "running", "id": "disk"}} {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "paused", "id": "disk"}} diff --git a/tests/qemu-iotests/198 b/tests/qemu-iotests/198 index 1b9bfb827b..46f0c54537 100755 --- a/tests/qemu-iotests/198 +++ b/tests/qemu-iotests/198 @@ -94,6 +94,7 @@ echo "== checking image base ==" $QEMU_IMG info --image-opts $IMGSPECBASE | _filter_img_info --format-specific \ | sed -e "/^disk size:/ D" -e '/refcount bits:/ D' -e '/compat:/ D' \ -e '/lazy refcounts:/ D' -e '/corrupt:/ D' -e '/^\s*data file/ D' \ + -e '/extended l2:/ D' \ | _filter_json_filename echo @@ -101,6 +102,7 @@ echo "== checking image layer ==" $QEMU_IMG info --image-opts $IMGSPECLAYER | _filter_img_info --format-specific \ | sed -e "/^disk size:/ D" -e '/refcount bits:/ D' -e '/compat:/ D' \ -e '/lazy refcounts:/ D' -e '/corrupt:/ D' -e '/^\s*data file/ D' \ + -e '/extended l2:/ D' \ | _filter_json_filename diff --git a/tests/qemu-iotests/206.out b/tests/qemu-iotests/206.out index 1a14255a83..a100849fcb 100644 --- a/tests/qemu-iotests/206.out +++ b/tests/qemu-iotests/206.out @@ -22,6 +22,7 @@ Format specific information: lazy refcounts: false refcount bits: 16 corrupt: false + extended l2: false === Successful image creation (inline blockdev-add, explicit defaults) === @@ -45,6 +46,7 @@ Format specific information: lazy refcounts: false refcount bits: 16 corrupt: false + extended l2: false === Successful image creation (v3 non-default options) === @@ -68,6 +70,7 @@ Format specific information: lazy refcounts: true refcount bits: 1 corrupt: false + extended l2: false === Successful image creation (v2 non-default options) === @@ -146,6 +149,7 @@ Format specific information: payload offset: 528384 master key iters: XXX corrupt: false + extended l2: false === Invalid BlockdevRef === @@ -199,7 +203,7 @@ Job failed: Different refcount widths than 16 bits require compatibility level 1 === Invalid backing file options === {"execute": "blockdev-create", "arguments": {"job-id": "job0", "options": {"backing-file": "/dev/null", "driver": "qcow2", "file": "node0", "preallocation": "full", "size": 67108864}}} {"return": {}} -Job failed: Backing file and preallocation cannot be used at the same time +Job failed: Backing file and preallocation can only be used at the same time if extended_l2 is on {"execute": "job-dismiss", "arguments": {"id": "job0"}} {"return": {}} diff --git a/tests/qemu-iotests/242.out b/tests/qemu-iotests/242.out index 091b9126ce..3759c99284 100644 --- a/tests/qemu-iotests/242.out +++ b/tests/qemu-iotests/242.out @@ -16,6 +16,7 @@ Format specific information: lazy refcounts: false refcount bits: 16 corrupt: false + extended l2: false No bitmap in JSON format output @@ -42,6 +43,7 @@ Format specific information: granularity: 32768 refcount bits: 16 corrupt: false + extended l2: false The same bitmaps in JSON format: [ @@ -80,6 +82,7 @@ Format specific information: granularity: 65536 refcount bits: 16 corrupt: false + extended l2: false The same bitmaps in JSON format: [ @@ -123,6 +126,7 @@ Format specific information: granularity: 65536 refcount bits: 16 corrupt: false + extended l2: false The same bitmaps in JSON format: [ @@ -167,5 +171,6 @@ Format specific information: granularity: 16384 refcount bits: 16 corrupt: false + extended l2: false Test complete diff --git a/tests/qemu-iotests/255.out b/tests/qemu-iotests/255.out index d74903db99..33b7f22de3 100644 --- a/tests/qemu-iotests/255.out +++ b/tests/qemu-iotests/255.out @@ -3,9 +3,9 @@ Finishing a commit job with background reads === Create backing chain and start VM === -Formatting 'TEST_DIR/PID-t.qcow2.mid', fmt=qcow2 cluster_size=65536 compression_type=zlib size=134217728 lazy_refcounts=off refcount_bits=16 +Formatting 'TEST_DIR/PID-t.qcow2.mid', fmt=qcow2 cluster_size=65536 extended_l2=off compression_type=zlib size=134217728 lazy_refcounts=off refcount_bits=16 -Formatting 'TEST_DIR/PID-t.qcow2', fmt=qcow2 cluster_size=65536 compression_type=zlib size=134217728 lazy_refcounts=off refcount_bits=16 +Formatting 'TEST_DIR/PID-t.qcow2', fmt=qcow2 cluster_size=65536 extended_l2=off compression_type=zlib size=134217728 lazy_refcounts=off refcount_bits=16 === Start background read requests === @@ -23,9 +23,9 @@ Closing the VM while a job is being cancelled === Create images and start VM === -Formatting 'TEST_DIR/PID-src.qcow2', fmt=qcow2 cluster_size=65536 compression_type=zlib size=134217728 lazy_refcounts=off refcount_bits=16 +Formatting 'TEST_DIR/PID-src.qcow2', fmt=qcow2 cluster_size=65536 extended_l2=off compression_type=zlib size=134217728 lazy_refcounts=off refcount_bits=16 -Formatting 'TEST_DIR/PID-dst.qcow2', fmt=qcow2 cluster_size=65536 compression_type=zlib size=134217728 lazy_refcounts=off refcount_bits=16 +Formatting 'TEST_DIR/PID-dst.qcow2', fmt=qcow2 cluster_size=65536 extended_l2=off compression_type=zlib size=134217728 lazy_refcounts=off refcount_bits=16 wrote 1048576/1048576 bytes at offset 0 1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) diff --git a/tests/qemu-iotests/271 b/tests/qemu-iotests/271 new file mode 100755 index 0000000000..e242b28b58 --- /dev/null +++ b/tests/qemu-iotests/271 @@ -0,0 +1,901 @@ +#!/usr/bin/env bash +# +# Test qcow2 images with extended L2 entries +# +# Copyright (C) 2019-2020 Igalia, S.L. +# Author: Alberto Garcia <berto@igalia.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. +# + +# creator +owner=berto@igalia.com + +seq="$(basename $0)" +echo "QA output created by $seq" + +here="$PWD" +status=1 # failure is the default! + +_cleanup() +{ + _cleanup_test_img + rm -f "$TEST_IMG.raw" +} +trap "_cleanup; exit \$status" 0 1 2 3 15 + +# get standard environment, filters and checks +. ./common.rc +. ./common.filter + +_supported_fmt qcow2 +_supported_proto file nfs +_supported_os Linux +_unsupported_imgopts extended_l2 compat=0.10 cluster_size data_file refcount_bits=1[^0-9] + +l2_offset=$((0x40000)) + +_verify_img() +{ + $QEMU_IMG compare "$TEST_IMG" "$TEST_IMG.raw" | grep -v 'Images are identical' + $QEMU_IMG check "$TEST_IMG" | _filter_qemu_img_check | \ + grep -v 'No errors were found on the image' +} + +# Compare the bitmap of an extended L2 entry against an expected value +_verify_l2_bitmap() +{ + entry_no="$1" # L2 entry number, starting from 0 + expected_alloc="$alloc" # Space-separated list of allocated subcluster indexes + expected_zero="$zero" # Space-separated list of zero subcluster indexes + + offset=$(($l2_offset + $entry_no * 16)) + entry=$(peek_file_be "$TEST_IMG" $offset 8) + offset=$(($offset + 8)) + bitmap=$(peek_file_be "$TEST_IMG" $offset 8) + + expected_bitmap=0 + for bit in $expected_alloc; do + expected_bitmap=$(($expected_bitmap | (1 << $bit))) + done + for bit in $expected_zero; do + expected_bitmap=$(($expected_bitmap | (1 << (32 + $bit)))) + done + printf -v expected_bitmap "%u" $expected_bitmap # Convert to unsigned + + printf "L2 entry #%d: 0x%016x %016x\n" "$entry_no" "$entry" "$bitmap" + if [ "$bitmap" != "$expected_bitmap" ]; then + printf "ERROR: expecting bitmap 0x%016x\n" "$expected_bitmap" + fi +} + +# This should be called as _run_test c=XXX sc=XXX off=XXX len=XXX cmd=XXX +# c: cluster number (0 if unset) +# sc: subcluster number inside cluster @c (0 if unset) +# off: offset inside subcluster @sc, in kilobytes (0 if unset) +# len: request length, passed directly to qemu-io (e.g: 256, 4k, 1M, ...) +# cmd: the command to pass to qemu-io, must be one of +# write -> write +# zero -> write -z +# unmap -> write -z -u +# compress -> write -c +# discard -> discard +_run_test() +{ + unset c sc off len cmd + for var in "$@"; do eval "$var"; done + case "${cmd:-write}" in + zero) + cmd="write -q -z";; + unmap) + cmd="write -q -z -u";; + compress) + pat=$((${pat:-0} + 1)) + cmd="write -q -c -P ${pat}";; + write) + pat=$((${pat:-0} + 1)) + cmd="write -q -P ${pat}";; + discard) + cmd="discard -q";; + *) + echo "Unknown option $cmd" + exit 1;; + esac + c="${c:-0}" + sc="${sc:-0}" + off="${off:-0}" + offset="$(($c * 64 + $sc * 2 + $off))" + [ "$offset" != 0 ] && offset="${offset}k" + cmd="$cmd ${offset} ${len}" + raw_cmd=$(echo $cmd | sed s/-c//) # Raw images don't support -c + echo $cmd | sed 's/-P [0-9][0-9]\?/-P PATTERN/' + $QEMU_IO -c "$cmd" "$TEST_IMG" | _filter_qemu_io + $QEMU_IO -c "$raw_cmd" -f raw "$TEST_IMG.raw" | _filter_qemu_io + _verify_img + _verify_l2_bitmap "$c" +} + +_reset_img() +{ + size="$1" + $QEMU_IMG create -f raw "$TEST_IMG.raw" "$size" | _filter_img_create + if [ "$use_backing_file" = "yes" ]; then + $QEMU_IMG create -f raw "$TEST_IMG.base" "$size" | _filter_img_create + $QEMU_IO -c "write -q -P 0xFF 0 $size" -f raw "$TEST_IMG.base" | _filter_qemu_io + $QEMU_IO -c "write -q -P 0xFF 0 $size" -f raw "$TEST_IMG.raw" | _filter_qemu_io + _make_test_img -o extended_l2=on -F raw -b "$TEST_IMG.base" "$size" + else + _make_test_img -o extended_l2=on "$size" + fi +} + +############################################################ +############################################################ +############################################################ + +# Test that writing to an image with subclusters produces the expected +# results, in images with and without backing files +for use_backing_file in yes no; do + echo + echo "### Standard write tests (backing file: $use_backing_file) ###" + echo + _reset_img 1M + ### Write subcluster #0 (beginning of subcluster) ### + alloc="0"; zero="" + _run_test sc=0 len=1k + + ### Write subcluster #1 (middle of subcluster) ### + alloc="0 1"; zero="" + _run_test sc=1 off=1 len=512 + + ### Write subcluster #2 (end of subcluster) ### + alloc="0 1 2"; zero="" + _run_test sc=2 off=1 len=1k + + ### Write subcluster #3 (full subcluster) ### + alloc="0 1 2 3"; zero="" + _run_test sc=3 len=2k + + ### Write subclusters #4-6 (full subclusters) ### + alloc="$(seq 0 6)"; zero="" + _run_test sc=4 len=6k + + ### Write subclusters #7-9 (partial subclusters) ### + alloc="$(seq 0 9)"; zero="" + _run_test sc=7 off=1 len=4k + + ### Write subcluster #16 (partial subcluster) ### + alloc="$(seq 0 9) 16"; zero="" + _run_test sc=16 len=1k + + ### Write subcluster #31-#33 (cluster overlap) ### + alloc="$(seq 0 9) 16 31"; zero="" + _run_test sc=31 off=1 len=4k + alloc="0 1" ; zero="" + _verify_l2_bitmap 1 + + ### Zero subcluster #1 + alloc="0 $(seq 2 9) 16 31"; zero="1" + _run_test sc=1 len=2k cmd=zero + + ### Zero cluster #0 + alloc=""; zero="$(seq 0 31)" + _run_test sc=0 len=64k cmd=zero + + ### Fill cluster #0 with data + alloc="$(seq 0 31)"; zero="" + _run_test sc=0 len=64k + + ### Zero and unmap half of cluster #0 (this won't unmap it) + alloc="$(seq 16 31)"; zero="$(seq 0 15)" + _run_test sc=0 len=32k cmd=unmap + + ### Zero and unmap cluster #0 + alloc=""; zero="$(seq 0 31)" + _run_test sc=0 len=64k cmd=unmap + + ### Write subcluster #1 (middle of subcluster) + alloc="1"; zero="0 $(seq 2 31)" + _run_test sc=1 off=1 len=512 + + ### Fill cluster #0 with data + alloc="$(seq 0 31)"; zero="" + _run_test sc=0 len=64k + + ### Discard cluster #0 + alloc=""; zero="$(seq 0 31)" + _run_test sc=0 len=64k cmd=discard + + ### Write compressed data to cluster #0 + alloc=""; zero="" + _run_test sc=0 len=64k cmd=compress + + ### Write subcluster #1 (middle of subcluster) + alloc="$(seq 0 31)"; zero="" + _run_test sc=1 off=1 len=512 +done + +############################################################ +############################################################ +############################################################ + +# calculate_l2_meta() checks if none of the clusters affected by a +# write operation need COW or changes to their L2 metadata and simply +# returns when they don't. This is a test for that optimization. +# Here clusters #0-#3 are overwritten but only #1 and #2 need changes. +echo +echo '### Overwriting several clusters without COW ###' +echo +use_backing_file="no" _reset_img 1M +# Write cluster #0, subclusters #12-#31 +alloc="$(seq 12 31)"; zero="" +_run_test sc=12 len=40k + +# Write cluster #1, subcluster #13 +alloc="13"; zero="" +_run_test c=1 sc=13 len=2k + +# Zeroize cluster #2, subcluster #14 +alloc="14"; zero="" +_run_test c=2 sc=14 len=2k +alloc=""; zero="14" +_run_test c=2 sc=14 len=2k cmd=zero + +# Write cluster #3, subclusters #0-#16 +alloc="$(seq 0 16)"; zero="" +_run_test c=3 sc=0 len=34k + +# Write from cluster #0, subcluster #12 to cluster #3, subcluster #11 +alloc="$(seq 12 31)"; zero="" +_run_test sc=12 len=192k +alloc="$(seq 0 31)"; zero="" +_verify_l2_bitmap 1 +_verify_l2_bitmap 2 + +alloc="$(seq 0 16)"; zero="" +_verify_l2_bitmap 3 + +############################################################ +############################################################ +############################################################ + +# Test different patterns of writing zeroes +for use_backing_file in yes no; do + echo + echo "### Writing zeroes 1: unallocated clusters (backing file: $use_backing_file) ###" + echo + # Note that the image size is not a multiple of the cluster size + _reset_img 2083k + + # Cluster-aligned request from clusters #0 to #2 + alloc=""; zero="$(seq 0 31)" + _run_test c=0 sc=0 len=192k cmd=zero + _verify_l2_bitmap 1 + _verify_l2_bitmap 2 + + # Subcluster-aligned request from clusters #3 to #5 + alloc=""; zero="$(seq 16 31)" + _run_test c=3 sc=16 len=128k cmd=zero + alloc=""; zero="$(seq 0 31)" + _verify_l2_bitmap 4 + alloc=""; zero="$(seq 0 15)" + _verify_l2_bitmap 5 + + # Unaligned request from clusters #6 to #8 + if [ "$use_backing_file" = "yes" ]; then + alloc="15"; zero="$(seq 16 31)" # copy-on-write happening here + else + alloc=""; zero="$(seq 15 31)" + fi + _run_test c=6 sc=15 off=1 len=128k cmd=zero + alloc=""; zero="$(seq 0 31)" + _verify_l2_bitmap 7 + if [ "$use_backing_file" = "yes" ]; then + alloc="15"; zero="$(seq 0 14)" # copy-on-write happening here + else + alloc=""; zero="$(seq 0 15)" + fi + _verify_l2_bitmap 8 + + echo + echo "### Writing zeroes 2: allocated clusters (backing file: $use_backing_file) ###" + echo + alloc="$(seq 0 31)"; zero="" + _run_test c=9 sc=0 len=576k + _verify_l2_bitmap 10 + _verify_l2_bitmap 11 + _verify_l2_bitmap 12 + _verify_l2_bitmap 13 + _verify_l2_bitmap 14 + _verify_l2_bitmap 15 + _verify_l2_bitmap 16 + _verify_l2_bitmap 17 + + # Cluster-aligned request from clusters #9 to #11 + alloc=""; zero="$(seq 0 31)" + _run_test c=9 sc=0 len=192k cmd=zero + _verify_l2_bitmap 10 + _verify_l2_bitmap 11 + + # Subcluster-aligned request from clusters #12 to #14 + alloc="$(seq 0 15)"; zero="$(seq 16 31)" + _run_test c=12 sc=16 len=128k cmd=zero + alloc=""; zero="$(seq 0 31)" + _verify_l2_bitmap 13 + alloc="$(seq 16 31)"; zero="$(seq 0 15)" + _verify_l2_bitmap 14 + + # Unaligned request from clusters #15 to #17 + alloc="$(seq 0 15)"; zero="$(seq 16 31)" + _run_test c=15 sc=15 off=1 len=128k cmd=zero + alloc=""; zero="$(seq 0 31)" + _verify_l2_bitmap 16 + alloc="$(seq 15 31)"; zero="$(seq 0 14)" + _verify_l2_bitmap 17 + + echo + echo "### Writing zeroes 3: compressed clusters (backing file: $use_backing_file) ###" + echo + alloc=""; zero="" + for c in $(seq 18 28); do + _run_test c=$c sc=0 len=64k cmd=compress + done + + # Cluster-aligned request from clusters #18 to #20 + alloc=""; zero="$(seq 0 31)" + _run_test c=18 sc=0 len=192k cmd=zero + _verify_l2_bitmap 19 + _verify_l2_bitmap 20 + + # Subcluster-aligned request from clusters #21 to #23. + # We cannot partially zero a compressed cluster so the code + # returns -ENOTSUP, which means copy-on-write of the compressed + # data and fill the rest with actual zeroes on disk. + # TODO: cluster #22 should use the 'all zeroes' bits. + alloc="$(seq 0 31)"; zero="" + _run_test c=21 sc=16 len=128k cmd=zero + _verify_l2_bitmap 22 + _verify_l2_bitmap 23 + + # Unaligned request from clusters #24 to #26 + # In this case QEMU internally sends a 1k request followed by a + # subcluster-aligned 128k request. The first request decompresses + # cluster #24, but that's not enough to perform the second request + # efficiently because it partially writes to cluster #26 (which is + # compressed) so we hit the same problem as before. + alloc="$(seq 0 31)"; zero="" + _run_test c=24 sc=15 off=1 len=129k cmd=zero + _verify_l2_bitmap 25 + _verify_l2_bitmap 26 + + # Unaligned request from clusters #27 to #29 + # Similar to the previous case, but this time the tail of the + # request does not correspond to a compressed cluster, so it can + # be zeroed efficiently. + # Note that the very last subcluster is partially written, so if + # there's a backing file we need to perform cow. + alloc="$(seq 0 15)"; zero="$(seq 16 31)" + _run_test c=27 sc=15 off=1 len=128k cmd=zero + alloc=""; zero="$(seq 0 31)" + _verify_l2_bitmap 28 + if [ "$use_backing_file" = "yes" ]; then + alloc="15"; zero="$(seq 0 14)" # copy-on-write happening here + else + alloc=""; zero="$(seq 0 15)" + fi + _verify_l2_bitmap 29 + + echo + echo "### Writing zeroes 4: other tests (backing file: $use_backing_file) ###" + echo + # Unaligned request in the middle of cluster #30. + # If there's a backing file we need to allocate and do + # copy-on-write on the partially zeroed subclusters. + # If not we can set the 'all zeroes' bit on them. + if [ "$use_backing_file" = "yes" ]; then + alloc="15 19"; zero="$(seq 16 18)" # copy-on-write happening here + else + alloc=""; zero="$(seq 15 19)" + fi + _run_test c=30 sc=15 off=1 len=8k cmd=zero + + # Fill the last cluster with zeroes, up to the end of the image + # (the image size is not a multiple of the cluster or subcluster size). + alloc=""; zero="$(seq 0 17)" + _run_test c=32 sc=0 len=35k cmd=zero +done + +############################################################ +############################################################ +############################################################ + +# Zero + unmap +for use_backing_file in yes no; do + echo + echo "### Zero + unmap 1: allocated clusters (backing file: $use_backing_file) ###" + echo + # Note that the image size is not a multiple of the cluster size + _reset_img 2083k + alloc="$(seq 0 31)"; zero="" + _run_test c=9 sc=0 len=576k + _verify_l2_bitmap 10 + _verify_l2_bitmap 11 + _verify_l2_bitmap 12 + _verify_l2_bitmap 13 + _verify_l2_bitmap 14 + _verify_l2_bitmap 15 + _verify_l2_bitmap 16 + _verify_l2_bitmap 17 + + # Cluster-aligned request from clusters #9 to #11 + alloc=""; zero="$(seq 0 31)" + _run_test c=9 sc=0 len=192k cmd=unmap + _verify_l2_bitmap 10 + _verify_l2_bitmap 11 + + # Subcluster-aligned request from clusters #12 to #14 + alloc="$(seq 0 15)"; zero="$(seq 16 31)" + _run_test c=12 sc=16 len=128k cmd=unmap + alloc=""; zero="$(seq 0 31)" + _verify_l2_bitmap 13 + alloc="$(seq 16 31)"; zero="$(seq 0 15)" + _verify_l2_bitmap 14 + + # Unaligned request from clusters #15 to #17 + alloc="$(seq 0 15)"; zero="$(seq 16 31)" + _run_test c=15 sc=15 off=1 len=128k cmd=unmap + alloc=""; zero="$(seq 0 31)" + _verify_l2_bitmap 16 + alloc="$(seq 15 31)"; zero="$(seq 0 14)" + _verify_l2_bitmap 17 + + echo + echo "### Zero + unmap 2: compressed clusters (backing file: $use_backing_file) ###" + echo + alloc=""; zero="" + for c in $(seq 18 28); do + _run_test c=$c sc=0 len=64k cmd=compress + done + + # Cluster-aligned request from clusters #18 to #20 + alloc=""; zero="$(seq 0 31)" + _run_test c=18 sc=0 len=192k cmd=unmap + _verify_l2_bitmap 19 + _verify_l2_bitmap 20 + + # Subcluster-aligned request from clusters #21 to #23. + # We cannot partially zero a compressed cluster so the code + # returns -ENOTSUP, which means copy-on-write of the compressed + # data and fill the rest with actual zeroes on disk. + # TODO: cluster #22 should use the 'all zeroes' bits. + alloc="$(seq 0 31)"; zero="" + _run_test c=21 sc=16 len=128k cmd=unmap + _verify_l2_bitmap 22 + _verify_l2_bitmap 23 + + # Unaligned request from clusters #24 to #26 + # In this case QEMU internally sends a 1k request followed by a + # subcluster-aligned 128k request. The first request decompresses + # cluster #24, but that's not enough to perform the second request + # efficiently because it partially writes to cluster #26 (which is + # compressed) so we hit the same problem as before. + alloc="$(seq 0 31)"; zero="" + _run_test c=24 sc=15 off=1 len=129k cmd=unmap + _verify_l2_bitmap 25 + _verify_l2_bitmap 26 + + # Unaligned request from clusters #27 to #29 + # Similar to the previous case, but this time the tail of the + # request does not correspond to a compressed cluster, so it can + # be zeroed efficiently. + # Note that the very last subcluster is partially written, so if + # there's a backing file we need to perform cow. + alloc="$(seq 0 15)"; zero="$(seq 16 31)" + _run_test c=27 sc=15 off=1 len=128k cmd=unmap + alloc=""; zero="$(seq 0 31)" + _verify_l2_bitmap 28 + if [ "$use_backing_file" = "yes" ]; then + alloc="15"; zero="$(seq 0 14)" # copy-on-write happening here + else + alloc=""; zero="$(seq 0 15)" + fi + _verify_l2_bitmap 29 +done + +############################################################ +############################################################ +############################################################ + +# Test qcow2_cluster_discard() with full and normal discards +for use_backing_file in yes no; do + echo + echo "### Discarding clusters with non-zero bitmaps (backing file: $use_backing_file) ###" + echo + if [ "$use_backing_file" = "yes" ]; then + _make_test_img -o extended_l2=on -F raw -b "$TEST_IMG.base" 1M + else + _make_test_img -o extended_l2=on 1M + fi + # Write clusters #0-#2 and then discard them + $QEMU_IO -c 'write -q 0 128k' "$TEST_IMG" + $QEMU_IO -c 'discard -q 0 128k' "$TEST_IMG" + # 'qemu-io discard' doesn't do a full discard, it zeroizes the + # cluster, so both clusters have all zero bits set now + alloc=""; zero="$(seq 0 31)" + _verify_l2_bitmap 0 + _verify_l2_bitmap 1 + # Now mark the 2nd half of the subclusters from cluster #0 as unallocated + poke_file "$TEST_IMG" $(($l2_offset+8)) "\x00\x00" + # Discard cluster #0 again to see how the zero bits have changed + $QEMU_IO -c 'discard -q 0 64k' "$TEST_IMG" + # And do a full discard of cluster #1 by shrinking and growing the image + $QEMU_IMG resize --shrink "$TEST_IMG" 64k + $QEMU_IMG resize "$TEST_IMG" 1M + # A normal discard sets all 'zero' bits only if the image has a + # backing file, otherwise it won't touch them. + if [ "$use_backing_file" = "yes" ]; then + alloc=""; zero="$(seq 0 31)" + else + alloc=""; zero="$(seq 0 15)" + fi + _verify_l2_bitmap 0 + # A full discard should clear the L2 entry completely. However + # when growing an image with a backing file the new clusters are + # zeroized to hide the stale data from the backing file + if [ "$use_backing_file" = "yes" ]; then + alloc=""; zero="$(seq 0 31)" + else + alloc=""; zero="" + fi + _verify_l2_bitmap 1 +done + +############################################################ +############################################################ +############################################################ + +# Test that corrupted L2 entries are detected in both read and write +# operations +for corruption_test_cmd in read write; do + echo + echo "### Corrupted L2 entries - $corruption_test_cmd test (allocated) ###" + echo + echo "# 'cluster is zero' bit set on the standard cluster descriptor" + echo + # We actually don't consider this a corrupted image. + # The 'cluster is zero' bit is unused in extended L2 entries so + # QEMU ignores it. + # TODO: maybe treat the image as corrupted and make qemu-img check fix it? + _make_test_img -o extended_l2=on 1M + $QEMU_IO -c 'write -q -P 0x11 0 2k' "$TEST_IMG" + poke_file "$TEST_IMG" $(($l2_offset+7)) "\x01" + alloc="0"; zero="" + _verify_l2_bitmap 0 + $QEMU_IO -c "$corruption_test_cmd -q -P 0x11 0 1k" "$TEST_IMG" + if [ "$corruption_test_cmd" = "write" ]; then + alloc="0"; zero="" + fi + _verify_l2_bitmap 0 + + echo + echo "# Both 'subcluster is zero' and 'subcluster is allocated' bits set" + echo + _make_test_img -o extended_l2=on 1M + # Write from the middle of cluster #0 to the middle of cluster #2 + $QEMU_IO -c 'write -q 32k 128k' "$TEST_IMG" + # Corrupt the L2 entry from cluster #1 + poke_file_be "$TEST_IMG" $(($l2_offset+24)) 4 1 + alloc="$(seq 0 31)"; zero="0" + _verify_l2_bitmap 1 + $QEMU_IO -c "$corruption_test_cmd 0 192k" "$TEST_IMG" + + echo + echo "### Corrupted L2 entries - $corruption_test_cmd test (unallocated) ###" + echo + echo "# 'cluster is zero' bit set on the standard cluster descriptor" + echo + # We actually don't consider this a corrupted image. + # The 'cluster is zero' bit is unused in extended L2 entries so + # QEMU ignores it. + # TODO: maybe treat the image as corrupted and make qemu-img check fix it? + _make_test_img -o extended_l2=on 1M + # We want to modify the (empty) L2 entry from cluster #0, + # but we write to #4 in order to initialize the L2 table first + $QEMU_IO -c 'write -q 256k 1k' "$TEST_IMG" + poke_file "$TEST_IMG" $(($l2_offset+7)) "\x01" + alloc=""; zero="" + _verify_l2_bitmap 0 + $QEMU_IO -c "$corruption_test_cmd -q 0 1k" "$TEST_IMG" + if [ "$corruption_test_cmd" = "write" ]; then + alloc="0"; zero="" + fi + _verify_l2_bitmap 0 + + echo + echo "# 'subcluster is allocated' bit set" + echo + _make_test_img -o extended_l2=on 1M + # We want to corrupt the (empty) L2 entry from cluster #0, + # but we write to #4 in order to initialize the L2 table first + $QEMU_IO -c 'write -q 256k 1k' "$TEST_IMG" + poke_file "$TEST_IMG" $(($l2_offset+15)) "\x01" + alloc="0"; zero="" + _verify_l2_bitmap 0 + $QEMU_IO -c "$corruption_test_cmd 0 1k" "$TEST_IMG" + + echo + echo "# Both 'subcluster is zero' and 'subcluster is allocated' bits set" + echo + _make_test_img -o extended_l2=on 1M + # We want to corrupt the (empty) L2 entry from cluster #1, + # but we write to #4 in order to initialize the L2 table first + $QEMU_IO -c 'write -q 256k 1k' "$TEST_IMG" + # Corrupt the L2 entry from cluster #1 + poke_file_be "$TEST_IMG" $(($l2_offset+24)) 8 $(((1 << 32) | 1)) + alloc="0"; zero="0" + _verify_l2_bitmap 1 + $QEMU_IO -c "$corruption_test_cmd 0 192k" "$TEST_IMG" + + echo + echo "### Compressed cluster with subcluster bitmap != 0 - $corruption_test_cmd test ###" + echo + # We actually don't consider this a corrupted image. + # The bitmap in compressed clusters is unused so QEMU should just ignore it. + _make_test_img -o extended_l2=on 1M + $QEMU_IO -c 'write -q -P 11 -c 0 64k' "$TEST_IMG" + # Change the L2 bitmap to allocate subcluster #31 and zeroize subcluster #0 + poke_file "$TEST_IMG" $(($l2_offset+11)) "\x01\x80" + alloc="31"; zero="0" + _verify_l2_bitmap 0 + $QEMU_IO -c "$corruption_test_cmd -P 11 0 64k" "$TEST_IMG" | _filter_qemu_io + # Writing allocates a new uncompressed cluster so we get a new bitmap + if [ "$corruption_test_cmd" = "write" ]; then + alloc="$(seq 0 31)"; zero="" + fi + _verify_l2_bitmap 0 +done + +############################################################ +############################################################ +############################################################ + +echo +echo "### Detect and repair unaligned clusters ###" +echo +# Create a backing file and fill it with data +$QEMU_IMG create -f raw "$TEST_IMG.base" 128k | _filter_img_create +$QEMU_IO -c "write -q -P 0xff 0 128k" -f raw "$TEST_IMG.base" | _filter_qemu_io + +echo "# Corrupted L2 entry, allocated subcluster #" +# Create a new image, allocate a cluster and write some data to it +_make_test_img -o extended_l2=on -F raw -b "$TEST_IMG.base" +$QEMU_IO -c 'write -q -P 1 4k 2k' "$TEST_IMG" +# Corrupt the L2 entry by making the offset unaligned +poke_file "$TEST_IMG" "$(($l2_offset+6))" "\x02" +# This cannot be repaired, qemu-img check will fail to fix it +_check_test_img -r all +# Attempting to read the image will still show that it's corrupted +$QEMU_IO -c 'read -q 0 2k' "$TEST_IMG" + +echo "# Corrupted L2 entry, no allocated subclusters #" +# Create a new image, allocate a cluster and zeroize subcluster #2 +_make_test_img -o extended_l2=on -F raw -b "$TEST_IMG.base" +$QEMU_IO -c 'write -q -P 1 4k 2k' "$TEST_IMG" +$QEMU_IO -c 'write -q -z 4k 2k' "$TEST_IMG" +# Corrupt the L2 entry by making the offset unaligned +poke_file "$TEST_IMG" "$(($l2_offset+6))" "\x02" +# This time none of the subclusters are allocated so we can repair the image +_check_test_img -r all +# And the data can be read normally +$QEMU_IO -c 'read -q -P 0xff 0 4k' "$TEST_IMG" +$QEMU_IO -c 'read -q -P 0x00 4k 2k' "$TEST_IMG" +$QEMU_IO -c 'read -q -P 0xff 6k 122k' "$TEST_IMG" + +############################################################ +############################################################ +############################################################ + +echo +echo "### Image creation options ###" +echo +echo "# cluster_size < 16k" +_make_test_img -o extended_l2=on,cluster_size=8k 1M + +echo "# backing file and preallocation=metadata" +# For preallocation with backing files, create a backing file first +$QEMU_IMG create -f raw "$TEST_IMG.base" 1M | _filter_img_create +$QEMU_IO -c "write -q -P 0xff 0 1M" -f raw "$TEST_IMG.base" | _filter_qemu_io + +_make_test_img -o extended_l2=on,preallocation=metadata -F raw -b "$TEST_IMG.base" 512k +$QEMU_IMG resize "$TEST_IMG" 1M +$QEMU_IO -c 'read -P 0xff 0 512k' "$TEST_IMG" | _filter_qemu_io +$QEMU_IO -c 'read -P 0x00 512k 512k' "$TEST_IMG" | _filter_qemu_io +$QEMU_IMG map "$TEST_IMG" | _filter_testdir + +echo "# backing file and preallocation=falloc" +_make_test_img -o extended_l2=on,preallocation=falloc -F raw -b "$TEST_IMG.base" 512k +$QEMU_IMG resize "$TEST_IMG" 1M +$QEMU_IO -c 'read -P 0xff 0 512k' "$TEST_IMG" | _filter_qemu_io +$QEMU_IO -c 'read -P 0x00 512k 512k' "$TEST_IMG" | _filter_qemu_io +$QEMU_IMG map "$TEST_IMG" | _filter_testdir + +echo "# backing file and preallocation=full" +_make_test_img -o extended_l2=on,preallocation=full -F raw -b "$TEST_IMG.base" 512k +$QEMU_IMG resize "$TEST_IMG" 1M +$QEMU_IO -c 'read -P 0xff 0 512k' "$TEST_IMG" | _filter_qemu_io +$QEMU_IO -c 'read -P 0x00 512k 512k' "$TEST_IMG" | _filter_qemu_io +$QEMU_IMG map "$TEST_IMG" | _filter_testdir + +echo +echo "### Image resizing with preallocation and backing files ###" +echo +# In this case the new subclusters must have the 'all zeroes' bit set +echo "# resize --preallocation=metadata" +_make_test_img -o extended_l2=on -F raw -b "$TEST_IMG.base" 503k +$QEMU_IMG resize --preallocation=metadata "$TEST_IMG" 1013k +$QEMU_IO -c 'read -P 0xff 0 503k' "$TEST_IMG" | _filter_qemu_io +$QEMU_IO -c 'read -P 0x00 503k 510k' "$TEST_IMG" | _filter_qemu_io + +# In this case and the next one the new subclusters must be allocated +echo "# resize --preallocation=falloc" +_make_test_img -o extended_l2=on -F raw -b "$TEST_IMG.base" 503k +$QEMU_IMG resize --preallocation=falloc "$TEST_IMG" 1013k +$QEMU_IO -c 'read -P 0xff 0 503k' "$TEST_IMG" | _filter_qemu_io +$QEMU_IO -c 'read -P 0x00 503k 510k' "$TEST_IMG" | _filter_qemu_io + +echo "# resize --preallocation=full" +_make_test_img -o extended_l2=on -F raw -b "$TEST_IMG.base" 503k +$QEMU_IMG resize --preallocation=full "$TEST_IMG" 1013k +$QEMU_IO -c 'read -P 0xff 0 503k' "$TEST_IMG" | _filter_qemu_io +$QEMU_IO -c 'read -P 0x00 503k 510k' "$TEST_IMG" | _filter_qemu_io + +echo +echo "### Image resizing with preallocation without backing files ###" +echo +# In this case the new subclusters must have the 'all zeroes' bit set +echo "# resize --preallocation=metadata" +_make_test_img -o extended_l2=on 503k +$QEMU_IO -c 'write -P 0xff 0 503k' "$TEST_IMG" | _filter_qemu_io +$QEMU_IMG resize --preallocation=metadata "$TEST_IMG" 1013k +$QEMU_IO -c 'read -P 0xff 0 503k' "$TEST_IMG" | _filter_qemu_io +$QEMU_IO -c 'read -P 0x00 503k 510k' "$TEST_IMG" | _filter_qemu_io + +# In this case and the next one the new subclusters must be allocated +echo "# resize --preallocation=falloc" +_make_test_img -o extended_l2=on 503k +$QEMU_IO -c 'write -P 0xff 0 503k' "$TEST_IMG" | _filter_qemu_io +$QEMU_IMG resize --preallocation=falloc "$TEST_IMG" 1013k +$QEMU_IO -c 'read -P 0xff 0 503k' "$TEST_IMG" | _filter_qemu_io +$QEMU_IO -c 'read -P 0x00 503k 510k' "$TEST_IMG" | _filter_qemu_io + +echo "# resize --preallocation=full" +_make_test_img -o extended_l2=on 503k +$QEMU_IO -c 'write -P 0xff 0 503k' "$TEST_IMG" | _filter_qemu_io +$QEMU_IMG resize --preallocation=full "$TEST_IMG" 1013k +$QEMU_IO -c 'read -P 0xff 0 503k' "$TEST_IMG" | _filter_qemu_io +$QEMU_IO -c 'read -P 0x00 503k 510k' "$TEST_IMG" | _filter_qemu_io + +echo +echo "### qemu-img measure ###" +echo +echo "# 512MB, extended_l2=off" # This needs one L2 table +$QEMU_IMG measure --size 512M -O qcow2 -o extended_l2=off +echo "# 512MB, extended_l2=on" # This needs two L2 tables +$QEMU_IMG measure --size 512M -O qcow2 -o extended_l2=on + +echo "# 16K clusters, 64GB, extended_l2=off" # This needs one full L1 table cluster +$QEMU_IMG measure --size 64G -O qcow2 -o cluster_size=16k,extended_l2=off +echo "# 16K clusters, 64GB, extended_l2=on" # This needs two full L2 table clusters +$QEMU_IMG measure --size 64G -O qcow2 -o cluster_size=16k,extended_l2=on + +echo "# 8k clusters" # This should fail +$QEMU_IMG measure --size 1M -O qcow2 -o cluster_size=8k,extended_l2=on + +echo "# 1024 TB" # Maximum allowed size with extended_l2=on and 64K clusters +$QEMU_IMG measure --size 1024T -O qcow2 -o extended_l2=on +echo "# 1025 TB" # This should fail +$QEMU_IMG measure --size 1025T -O qcow2 -o extended_l2=on + +echo +echo "### qemu-img amend ###" +echo +_make_test_img -o extended_l2=on 1M +$QEMU_IMG amend -o extended_l2=off "$TEST_IMG" && echo "Unexpected pass" + +_make_test_img -o extended_l2=off 1M +$QEMU_IMG amend -o extended_l2=on "$TEST_IMG" && echo "Unexpected pass" + +echo +echo "### Test copy-on-write on an image with snapshots ###" +echo +_make_test_img -o extended_l2=on 1M + +# For each cluster from #0 to #9 this loop zeroes subcluster #7 +# and allocates subclusters #13 and #18. +alloc="13 18"; zero="7" +for c in $(seq 0 9); do + $QEMU_IO -c "write -q -z $((64*$c+14))k 2k" \ + -c "write -q -P $((0xd0+$c)) $((64*$c+26))k 2k" \ + -c "write -q -P $((0xe0+$c)) $((64*$c+36))k 2k" "$TEST_IMG" + _verify_l2_bitmap "$c" +done + +# Create a snapshot and set l2_offset to the new L2 table +$QEMU_IMG snapshot -c snap1 "$TEST_IMG" +l2_offset=$((0x110000)) + +# Write different patterns to each one of the clusters +# in order to see how copy-on-write behaves in each case. +$QEMU_IO -c "write -q -P 0xf0 $((64*0+30))k 1k" \ + -c "write -q -P 0xf1 $((64*1+20))k 1k" \ + -c "write -q -P 0xf2 $((64*2+40))k 1k" \ + -c "write -q -P 0xf3 $((64*3+26))k 1k" \ + -c "write -q -P 0xf4 $((64*4+14))k 1k" \ + -c "write -q -P 0xf5 $((64*5+1))k 1k" \ + -c "write -q -z $((64*6+30))k 3k" \ + -c "write -q -z $((64*7+26))k 2k" \ + -c "write -q -z $((64*8+26))k 1k" \ + -c "write -q -z $((64*9+12))k 1k" \ + "$TEST_IMG" +alloc="$(seq 13 18)"; zero="7" _verify_l2_bitmap 0 +alloc="$(seq 10 18)"; zero="7" _verify_l2_bitmap 1 +alloc="$(seq 13 20)"; zero="7" _verify_l2_bitmap 2 +alloc="$(seq 13 18)"; zero="7" _verify_l2_bitmap 3 +alloc="$(seq 7 18)"; zero="" _verify_l2_bitmap 4 +alloc="$(seq 0 18)"; zero="" _verify_l2_bitmap 5 +alloc="13 18"; zero="7 15 16" _verify_l2_bitmap 6 +alloc="18"; zero="7 13" _verify_l2_bitmap 7 +alloc="$(seq 13 18)"; zero="7" _verify_l2_bitmap 8 +alloc="13 18"; zero="6 7" _verify_l2_bitmap 9 + +echo +echo "### Test concurrent requests ###" +echo + +_concurrent_io() +{ +# Allocate three subclusters in the same cluster. +# This works because handle_dependencies() checks whether the requests +# allocate the same cluster, even if the COW regions don't overlap (in +# this case they don't). +cat <<EOF +open -o driver=$IMGFMT blkdebug::$TEST_IMG +break write_aio A +aio_write -P 10 30k 2k +wait_break A +aio_write -P 11 20k 2k +aio_write -P 12 40k 2k +resume A +aio_flush +EOF +} + +_concurrent_verify() +{ +cat <<EOF +open -o driver=$IMGFMT $TEST_IMG +read -q -P 10 30k 2k +read -q -P 11 20k 2k +read -q -P 12 40k 2k +EOF +} + +_make_test_img -o extended_l2=on 1M +_concurrent_io | $QEMU_IO | _filter_qemu_io +_concurrent_verify | $QEMU_IO | _filter_qemu_io + +# success, all done +echo "*** done" +rm -f $seq.full +status=0 diff --git a/tests/qemu-iotests/271.out b/tests/qemu-iotests/271.out new file mode 100644 index 0000000000..92deb7ebb0 --- /dev/null +++ b/tests/qemu-iotests/271.out @@ -0,0 +1,726 @@ +QA output created by 271 + +### Standard write tests (backing file: yes) ### + +Formatting 'TEST_DIR/t.IMGFMT.raw', fmt=raw size=1048576 +Formatting 'TEST_DIR/t.IMGFMT.base', fmt=raw size=1048576 +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1048576 backing_file=TEST_DIR/t.IMGFMT.base backing_fmt=raw +write -q -P PATTERN 0 1k +L2 entry #0: 0x8000000000050000 0000000000000001 +write -q -P PATTERN 3k 512 +L2 entry #0: 0x8000000000050000 0000000000000003 +write -q -P PATTERN 5k 1k +L2 entry #0: 0x8000000000050000 0000000000000007 +write -q -P PATTERN 6k 2k +L2 entry #0: 0x8000000000050000 000000000000000f +write -q -P PATTERN 8k 6k +L2 entry #0: 0x8000000000050000 000000000000007f +write -q -P PATTERN 15k 4k +L2 entry #0: 0x8000000000050000 00000000000003ff +write -q -P PATTERN 32k 1k +L2 entry #0: 0x8000000000050000 00000000000103ff +write -q -P PATTERN 63k 4k +L2 entry #0: 0x8000000000050000 00000000800103ff +L2 entry #1: 0x8000000000060000 0000000000000003 +write -q -z 2k 2k +L2 entry #0: 0x8000000000050000 00000002800103fd +write -q -z 0 64k +L2 entry #0: 0x8000000000050000 ffffffff00000000 +write -q -P PATTERN 0 64k +L2 entry #0: 0x8000000000050000 00000000ffffffff +write -q -z -u 0 32k +L2 entry #0: 0x8000000000050000 0000ffffffff0000 +write -q -z -u 0 64k +L2 entry #0: 0x0000000000000000 ffffffff00000000 +write -q -P PATTERN 3k 512 +L2 entry #0: 0x8000000000050000 fffffffd00000002 +write -q -P PATTERN 0 64k +L2 entry #0: 0x8000000000050000 00000000ffffffff +discard -q 0 64k +L2 entry #0: 0x0000000000000000 ffffffff00000000 +write -q -c -P PATTERN 0 64k +L2 entry #0: 0x4000000000050000 0000000000000000 +write -q -P PATTERN 3k 512 +L2 entry #0: 0x8000000000070000 00000000ffffffff + +### Standard write tests (backing file: no) ### + +Formatting 'TEST_DIR/t.IMGFMT.raw', fmt=raw size=1048576 +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1048576 +write -q -P PATTERN 0 1k +L2 entry #0: 0x8000000000050000 0000000000000001 +write -q -P PATTERN 3k 512 +L2 entry #0: 0x8000000000050000 0000000000000003 +write -q -P PATTERN 5k 1k +L2 entry #0: 0x8000000000050000 0000000000000007 +write -q -P PATTERN 6k 2k +L2 entry #0: 0x8000000000050000 000000000000000f +write -q -P PATTERN 8k 6k +L2 entry #0: 0x8000000000050000 000000000000007f +write -q -P PATTERN 15k 4k +L2 entry #0: 0x8000000000050000 00000000000003ff +write -q -P PATTERN 32k 1k +L2 entry #0: 0x8000000000050000 00000000000103ff +write -q -P PATTERN 63k 4k +L2 entry #0: 0x8000000000050000 00000000800103ff +L2 entry #1: 0x8000000000060000 0000000000000003 +write -q -z 2k 2k +L2 entry #0: 0x8000000000050000 00000002800103fd +write -q -z 0 64k +L2 entry #0: 0x8000000000050000 ffffffff00000000 +write -q -P PATTERN 0 64k +L2 entry #0: 0x8000000000050000 00000000ffffffff +write -q -z -u 0 32k +L2 entry #0: 0x8000000000050000 0000ffffffff0000 +write -q -z -u 0 64k +L2 entry #0: 0x0000000000000000 ffffffff00000000 +write -q -P PATTERN 3k 512 +L2 entry #0: 0x8000000000050000 fffffffd00000002 +write -q -P PATTERN 0 64k +L2 entry #0: 0x8000000000050000 00000000ffffffff +discard -q 0 64k +L2 entry #0: 0x0000000000000000 ffffffff00000000 +write -q -c -P PATTERN 0 64k +L2 entry #0: 0x4000000000050000 0000000000000000 +write -q -P PATTERN 3k 512 +L2 entry #0: 0x8000000000070000 00000000ffffffff + +### Overwriting several clusters without COW ### + +Formatting 'TEST_DIR/t.IMGFMT.raw', fmt=raw size=1048576 +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1048576 +write -q -P PATTERN 24k 40k +L2 entry #0: 0x8000000000050000 00000000fffff000 +write -q -P PATTERN 90k 2k +L2 entry #1: 0x8000000000060000 0000000000002000 +write -q -P PATTERN 156k 2k +L2 entry #2: 0x8000000000070000 0000000000004000 +write -q -z 156k 2k +L2 entry #2: 0x8000000000070000 0000400000000000 +write -q -P PATTERN 192k 34k +L2 entry #3: 0x8000000000080000 000000000001ffff +write -q -P PATTERN 24k 192k +L2 entry #0: 0x8000000000050000 00000000fffff000 +L2 entry #1: 0x8000000000060000 00000000ffffffff +L2 entry #2: 0x8000000000070000 00000000ffffffff +L2 entry #3: 0x8000000000080000 000000000001ffff + +### Writing zeroes 1: unallocated clusters (backing file: yes) ### + +Formatting 'TEST_DIR/t.IMGFMT.raw', fmt=raw size=2132992 +Formatting 'TEST_DIR/t.IMGFMT.base', fmt=raw size=2132992 +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=2132992 backing_file=TEST_DIR/t.IMGFMT.base backing_fmt=raw +write -q -z 0 192k +L2 entry #0: 0x0000000000000000 ffffffff00000000 +L2 entry #1: 0x0000000000000000 ffffffff00000000 +L2 entry #2: 0x0000000000000000 ffffffff00000000 +write -q -z 224k 128k +L2 entry #3: 0x0000000000000000 ffff000000000000 +L2 entry #4: 0x0000000000000000 ffffffff00000000 +L2 entry #5: 0x0000000000000000 0000ffff00000000 +write -q -z 415k 128k +L2 entry #6: 0x8000000000050000 ffff000000008000 +L2 entry #7: 0x0000000000000000 ffffffff00000000 +L2 entry #8: 0x8000000000060000 00007fff00008000 + +### Writing zeroes 2: allocated clusters (backing file: yes) ### + +write -q -P PATTERN 576k 576k +L2 entry #9: 0x8000000000070000 00000000ffffffff +L2 entry #10: 0x8000000000080000 00000000ffffffff +L2 entry #11: 0x8000000000090000 00000000ffffffff +L2 entry #12: 0x80000000000a0000 00000000ffffffff +L2 entry #13: 0x80000000000b0000 00000000ffffffff +L2 entry #14: 0x80000000000c0000 00000000ffffffff +L2 entry #15: 0x80000000000d0000 00000000ffffffff +L2 entry #16: 0x80000000000e0000 00000000ffffffff +L2 entry #17: 0x80000000000f0000 00000000ffffffff +write -q -z 576k 192k +L2 entry #9: 0x8000000000070000 ffffffff00000000 +L2 entry #10: 0x8000000000080000 ffffffff00000000 +L2 entry #11: 0x8000000000090000 ffffffff00000000 +write -q -z 800k 128k +L2 entry #12: 0x80000000000a0000 ffff00000000ffff +L2 entry #13: 0x80000000000b0000 ffffffff00000000 +L2 entry #14: 0x80000000000c0000 0000ffffffff0000 +write -q -z 991k 128k +L2 entry #15: 0x80000000000d0000 ffff00000000ffff +L2 entry #16: 0x80000000000e0000 ffffffff00000000 +L2 entry #17: 0x80000000000f0000 00007fffffff8000 + +### Writing zeroes 3: compressed clusters (backing file: yes) ### + +write -q -c -P PATTERN 1152k 64k +L2 entry #18: 0x4000000000100000 0000000000000000 +write -q -c -P PATTERN 1216k 64k +L2 entry #19: 0x4000000000110000 0000000000000000 +write -q -c -P PATTERN 1280k 64k +L2 entry #20: 0x4000000000120000 0000000000000000 +write -q -c -P PATTERN 1344k 64k +L2 entry #21: 0x4000000000130000 0000000000000000 +write -q -c -P PATTERN 1408k 64k +L2 entry #22: 0x4000000000140000 0000000000000000 +write -q -c -P PATTERN 1472k 64k +L2 entry #23: 0x4000000000150000 0000000000000000 +write -q -c -P PATTERN 1536k 64k +L2 entry #24: 0x4000000000160000 0000000000000000 +write -q -c -P PATTERN 1600k 64k +L2 entry #25: 0x4000000000170000 0000000000000000 +write -q -c -P PATTERN 1664k 64k +L2 entry #26: 0x4000000000180000 0000000000000000 +write -q -c -P PATTERN 1728k 64k +L2 entry #27: 0x4000000000190000 0000000000000000 +write -q -c -P PATTERN 1792k 64k +L2 entry #28: 0x40000000001a0000 0000000000000000 +write -q -z 1152k 192k +L2 entry #18: 0x0000000000000000 ffffffff00000000 +L2 entry #19: 0x0000000000000000 ffffffff00000000 +L2 entry #20: 0x0000000000000000 ffffffff00000000 +write -q -z 1376k 128k +L2 entry #21: 0x8000000000100000 00000000ffffffff +L2 entry #22: 0x8000000000110000 00000000ffffffff +L2 entry #23: 0x8000000000120000 00000000ffffffff +write -q -z 1567k 129k +L2 entry #24: 0x8000000000130000 00000000ffffffff +L2 entry #25: 0x8000000000140000 00000000ffffffff +L2 entry #26: 0x8000000000150000 00000000ffffffff +write -q -z 1759k 128k +L2 entry #27: 0x8000000000160000 ffff00000000ffff +L2 entry #28: 0x0000000000000000 ffffffff00000000 +L2 entry #29: 0x8000000000170000 00007fff00008000 + +### Writing zeroes 4: other tests (backing file: yes) ### + +write -q -z 1951k 8k +L2 entry #30: 0x8000000000180000 0007000000088000 +write -q -z 2048k 35k +L2 entry #32: 0x0000000000000000 0003ffff00000000 + +### Writing zeroes 1: unallocated clusters (backing file: no) ### + +Formatting 'TEST_DIR/t.IMGFMT.raw', fmt=raw size=2132992 +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=2132992 +write -q -z 0 192k +L2 entry #0: 0x0000000000000000 ffffffff00000000 +L2 entry #1: 0x0000000000000000 ffffffff00000000 +L2 entry #2: 0x0000000000000000 ffffffff00000000 +write -q -z 224k 128k +L2 entry #3: 0x0000000000000000 ffff000000000000 +L2 entry #4: 0x0000000000000000 ffffffff00000000 +L2 entry #5: 0x0000000000000000 0000ffff00000000 +write -q -z 415k 128k +L2 entry #6: 0x0000000000000000 ffff800000000000 +L2 entry #7: 0x0000000000000000 ffffffff00000000 +L2 entry #8: 0x0000000000000000 0000ffff00000000 + +### Writing zeroes 2: allocated clusters (backing file: no) ### + +write -q -P PATTERN 576k 576k +L2 entry #9: 0x8000000000050000 00000000ffffffff +L2 entry #10: 0x8000000000060000 00000000ffffffff +L2 entry #11: 0x8000000000070000 00000000ffffffff +L2 entry #12: 0x8000000000080000 00000000ffffffff +L2 entry #13: 0x8000000000090000 00000000ffffffff +L2 entry #14: 0x80000000000a0000 00000000ffffffff +L2 entry #15: 0x80000000000b0000 00000000ffffffff +L2 entry #16: 0x80000000000c0000 00000000ffffffff +L2 entry #17: 0x80000000000d0000 00000000ffffffff +write -q -z 576k 192k +L2 entry #9: 0x8000000000050000 ffffffff00000000 +L2 entry #10: 0x8000000000060000 ffffffff00000000 +L2 entry #11: 0x8000000000070000 ffffffff00000000 +write -q -z 800k 128k +L2 entry #12: 0x8000000000080000 ffff00000000ffff +L2 entry #13: 0x8000000000090000 ffffffff00000000 +L2 entry #14: 0x80000000000a0000 0000ffffffff0000 +write -q -z 991k 128k +L2 entry #15: 0x80000000000b0000 ffff00000000ffff +L2 entry #16: 0x80000000000c0000 ffffffff00000000 +L2 entry #17: 0x80000000000d0000 00007fffffff8000 + +### Writing zeroes 3: compressed clusters (backing file: no) ### + +write -q -c -P PATTERN 1152k 64k +L2 entry #18: 0x40000000000e0000 0000000000000000 +write -q -c -P PATTERN 1216k 64k +L2 entry #19: 0x40000000000f0000 0000000000000000 +write -q -c -P PATTERN 1280k 64k +L2 entry #20: 0x4000000000100000 0000000000000000 +write -q -c -P PATTERN 1344k 64k +L2 entry #21: 0x4000000000110000 0000000000000000 +write -q -c -P PATTERN 1408k 64k +L2 entry #22: 0x4000000000120000 0000000000000000 +write -q -c -P PATTERN 1472k 64k +L2 entry #23: 0x4000000000130000 0000000000000000 +write -q -c -P PATTERN 1536k 64k +L2 entry #24: 0x4000000000140000 0000000000000000 +write -q -c -P PATTERN 1600k 64k +L2 entry #25: 0x4000000000150000 0000000000000000 +write -q -c -P PATTERN 1664k 64k +L2 entry #26: 0x4000000000160000 0000000000000000 +write -q -c -P PATTERN 1728k 64k +L2 entry #27: 0x4000000000170000 0000000000000000 +write -q -c -P PATTERN 1792k 64k +L2 entry #28: 0x4000000000180000 0000000000000000 +write -q -z 1152k 192k +L2 entry #18: 0x0000000000000000 ffffffff00000000 +L2 entry #19: 0x0000000000000000 ffffffff00000000 +L2 entry #20: 0x0000000000000000 ffffffff00000000 +write -q -z 1376k 128k +L2 entry #21: 0x80000000000e0000 00000000ffffffff +L2 entry #22: 0x80000000000f0000 00000000ffffffff +L2 entry #23: 0x8000000000100000 00000000ffffffff +write -q -z 1567k 129k +L2 entry #24: 0x8000000000110000 00000000ffffffff +L2 entry #25: 0x8000000000120000 00000000ffffffff +L2 entry #26: 0x8000000000130000 00000000ffffffff +write -q -z 1759k 128k +L2 entry #27: 0x8000000000140000 ffff00000000ffff +L2 entry #28: 0x0000000000000000 ffffffff00000000 +L2 entry #29: 0x0000000000000000 0000ffff00000000 + +### Writing zeroes 4: other tests (backing file: no) ### + +write -q -z 1951k 8k +L2 entry #30: 0x0000000000000000 000f800000000000 +write -q -z 2048k 35k +L2 entry #32: 0x0000000000000000 0003ffff00000000 + +### Zero + unmap 1: allocated clusters (backing file: yes) ### + +Formatting 'TEST_DIR/t.IMGFMT.raw', fmt=raw size=2132992 +Formatting 'TEST_DIR/t.IMGFMT.base', fmt=raw size=2132992 +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=2132992 backing_file=TEST_DIR/t.IMGFMT.base backing_fmt=raw +write -q -P PATTERN 576k 576k +L2 entry #9: 0x8000000000050000 00000000ffffffff +L2 entry #10: 0x8000000000060000 00000000ffffffff +L2 entry #11: 0x8000000000070000 00000000ffffffff +L2 entry #12: 0x8000000000080000 00000000ffffffff +L2 entry #13: 0x8000000000090000 00000000ffffffff +L2 entry #14: 0x80000000000a0000 00000000ffffffff +L2 entry #15: 0x80000000000b0000 00000000ffffffff +L2 entry #16: 0x80000000000c0000 00000000ffffffff +L2 entry #17: 0x80000000000d0000 00000000ffffffff +write -q -z -u 576k 192k +L2 entry #9: 0x0000000000000000 ffffffff00000000 +L2 entry #10: 0x0000000000000000 ffffffff00000000 +L2 entry #11: 0x0000000000000000 ffffffff00000000 +write -q -z -u 800k 128k +L2 entry #12: 0x8000000000080000 ffff00000000ffff +L2 entry #13: 0x0000000000000000 ffffffff00000000 +L2 entry #14: 0x80000000000a0000 0000ffffffff0000 +write -q -z -u 991k 128k +L2 entry #15: 0x80000000000b0000 ffff00000000ffff +L2 entry #16: 0x0000000000000000 ffffffff00000000 +L2 entry #17: 0x80000000000d0000 00007fffffff8000 + +### Zero + unmap 2: compressed clusters (backing file: yes) ### + +write -q -c -P PATTERN 1152k 64k +L2 entry #18: 0x4000000000050000 0000000000000000 +write -q -c -P PATTERN 1216k 64k +L2 entry #19: 0x4000000000060000 0000000000000000 +write -q -c -P PATTERN 1280k 64k +L2 entry #20: 0x4000000000070000 0000000000000000 +write -q -c -P PATTERN 1344k 64k +L2 entry #21: 0x4000000000090000 0000000000000000 +write -q -c -P PATTERN 1408k 64k +L2 entry #22: 0x40000000000c0000 0000000000000000 +write -q -c -P PATTERN 1472k 64k +L2 entry #23: 0x40000000000e0000 0000000000000000 +write -q -c -P PATTERN 1536k 64k +L2 entry #24: 0x40000000000f0000 0000000000000000 +write -q -c -P PATTERN 1600k 64k +L2 entry #25: 0x4000000000100000 0000000000000000 +write -q -c -P PATTERN 1664k 64k +L2 entry #26: 0x4000000000110000 0000000000000000 +write -q -c -P PATTERN 1728k 64k +L2 entry #27: 0x4000000000120000 0000000000000000 +write -q -c -P PATTERN 1792k 64k +L2 entry #28: 0x4000000000130000 0000000000000000 +write -q -z -u 1152k 192k +L2 entry #18: 0x0000000000000000 ffffffff00000000 +L2 entry #19: 0x0000000000000000 ffffffff00000000 +L2 entry #20: 0x0000000000000000 ffffffff00000000 +write -q -z -u 1376k 128k +L2 entry #21: 0x8000000000050000 00000000ffffffff +L2 entry #22: 0x8000000000060000 00000000ffffffff +L2 entry #23: 0x8000000000070000 00000000ffffffff +write -q -z -u 1567k 129k +L2 entry #24: 0x8000000000090000 00000000ffffffff +L2 entry #25: 0x80000000000e0000 00000000ffffffff +L2 entry #26: 0x80000000000f0000 00000000ffffffff +write -q -z -u 1759k 128k +L2 entry #27: 0x80000000000c0000 ffff00000000ffff +L2 entry #28: 0x0000000000000000 ffffffff00000000 +L2 entry #29: 0x8000000000100000 00007fff00008000 + +### Zero + unmap 1: allocated clusters (backing file: no) ### + +Formatting 'TEST_DIR/t.IMGFMT.raw', fmt=raw size=2132992 +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=2132992 +write -q -P PATTERN 576k 576k +L2 entry #9: 0x8000000000050000 00000000ffffffff +L2 entry #10: 0x8000000000060000 00000000ffffffff +L2 entry #11: 0x8000000000070000 00000000ffffffff +L2 entry #12: 0x8000000000080000 00000000ffffffff +L2 entry #13: 0x8000000000090000 00000000ffffffff +L2 entry #14: 0x80000000000a0000 00000000ffffffff +L2 entry #15: 0x80000000000b0000 00000000ffffffff +L2 entry #16: 0x80000000000c0000 00000000ffffffff +L2 entry #17: 0x80000000000d0000 00000000ffffffff +write -q -z -u 576k 192k +L2 entry #9: 0x0000000000000000 ffffffff00000000 +L2 entry #10: 0x0000000000000000 ffffffff00000000 +L2 entry #11: 0x0000000000000000 ffffffff00000000 +write -q -z -u 800k 128k +L2 entry #12: 0x8000000000080000 ffff00000000ffff +L2 entry #13: 0x0000000000000000 ffffffff00000000 +L2 entry #14: 0x80000000000a0000 0000ffffffff0000 +write -q -z -u 991k 128k +L2 entry #15: 0x80000000000b0000 ffff00000000ffff +L2 entry #16: 0x0000000000000000 ffffffff00000000 +L2 entry #17: 0x80000000000d0000 00007fffffff8000 + +### Zero + unmap 2: compressed clusters (backing file: no) ### + +write -q -c -P PATTERN 1152k 64k +L2 entry #18: 0x4000000000050000 0000000000000000 +write -q -c -P PATTERN 1216k 64k +L2 entry #19: 0x4000000000060000 0000000000000000 +write -q -c -P PATTERN 1280k 64k +L2 entry #20: 0x4000000000070000 0000000000000000 +write -q -c -P PATTERN 1344k 64k +L2 entry #21: 0x4000000000090000 0000000000000000 +write -q -c -P PATTERN 1408k 64k +L2 entry #22: 0x40000000000c0000 0000000000000000 +write -q -c -P PATTERN 1472k 64k +L2 entry #23: 0x40000000000e0000 0000000000000000 +write -q -c -P PATTERN 1536k 64k +L2 entry #24: 0x40000000000f0000 0000000000000000 +write -q -c -P PATTERN 1600k 64k +L2 entry #25: 0x4000000000100000 0000000000000000 +write -q -c -P PATTERN 1664k 64k +L2 entry #26: 0x4000000000110000 0000000000000000 +write -q -c -P PATTERN 1728k 64k +L2 entry #27: 0x4000000000120000 0000000000000000 +write -q -c -P PATTERN 1792k 64k +L2 entry #28: 0x4000000000130000 0000000000000000 +write -q -z -u 1152k 192k +L2 entry #18: 0x0000000000000000 ffffffff00000000 +L2 entry #19: 0x0000000000000000 ffffffff00000000 +L2 entry #20: 0x0000000000000000 ffffffff00000000 +write -q -z -u 1376k 128k +L2 entry #21: 0x8000000000050000 00000000ffffffff +L2 entry #22: 0x8000000000060000 00000000ffffffff +L2 entry #23: 0x8000000000070000 00000000ffffffff +write -q -z -u 1567k 129k +L2 entry #24: 0x8000000000090000 00000000ffffffff +L2 entry #25: 0x80000000000e0000 00000000ffffffff +L2 entry #26: 0x80000000000f0000 00000000ffffffff +write -q -z -u 1759k 128k +L2 entry #27: 0x80000000000c0000 ffff00000000ffff +L2 entry #28: 0x0000000000000000 ffffffff00000000 +L2 entry #29: 0x0000000000000000 0000ffff00000000 + +### Discarding clusters with non-zero bitmaps (backing file: yes) ### + +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1048576 backing_file=TEST_DIR/t.IMGFMT.base backing_fmt=raw +L2 entry #0: 0x0000000000000000 ffffffff00000000 +L2 entry #1: 0x0000000000000000 ffffffff00000000 +Image resized. +Image resized. +L2 entry #0: 0x0000000000000000 ffffffff00000000 +L2 entry #1: 0x0000000000000000 ffffffff00000000 + +### Discarding clusters with non-zero bitmaps (backing file: no) ### + +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1048576 +L2 entry #0: 0x0000000000000000 ffffffff00000000 +L2 entry #1: 0x0000000000000000 ffffffff00000000 +Image resized. +Image resized. +L2 entry #0: 0x0000000000000000 0000ffff00000000 +L2 entry #1: 0x0000000000000000 0000000000000000 + +### Corrupted L2 entries - read test (allocated) ### + +# 'cluster is zero' bit set on the standard cluster descriptor + +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1048576 +L2 entry #0: 0x8000000000050001 0000000000000001 +L2 entry #0: 0x8000000000050001 0000000000000001 + +# Both 'subcluster is zero' and 'subcluster is allocated' bits set + +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1048576 +L2 entry #1: 0x8000000000060000 00000001ffffffff +qcow2: Marking image as corrupt: Invalid cluster entry found (L2 offset: 0x40000, L2 index: 0x1); further corruption events will be suppressed +read failed: Input/output error + +### Corrupted L2 entries - read test (unallocated) ### + +# 'cluster is zero' bit set on the standard cluster descriptor + +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1048576 +L2 entry #0: 0x0000000000000001 0000000000000000 +L2 entry #0: 0x0000000000000001 0000000000000000 + +# 'subcluster is allocated' bit set + +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1048576 +L2 entry #0: 0x0000000000000000 0000000000000001 +qcow2: Marking image as corrupt: Invalid cluster entry found (L2 offset: 0x40000, L2 index: 0); further corruption events will be suppressed +read failed: Input/output error + +# Both 'subcluster is zero' and 'subcluster is allocated' bits set + +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1048576 +L2 entry #1: 0x0000000000000000 0000000100000001 +qcow2: Marking image as corrupt: Invalid cluster entry found (L2 offset: 0x40000, L2 index: 0x1); further corruption events will be suppressed +read failed: Input/output error + +### Compressed cluster with subcluster bitmap != 0 - read test ### + +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1048576 +L2 entry #0: 0x4000000000050000 0000000180000000 +read 65536/65536 bytes at offset 0 +64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +L2 entry #0: 0x4000000000050000 0000000180000000 + +### Corrupted L2 entries - write test (allocated) ### + +# 'cluster is zero' bit set on the standard cluster descriptor + +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1048576 +L2 entry #0: 0x8000000000050001 0000000000000001 +L2 entry #0: 0x8000000000050001 0000000000000001 + +# Both 'subcluster is zero' and 'subcluster is allocated' bits set + +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1048576 +L2 entry #1: 0x8000000000060000 00000001ffffffff +qcow2: Marking image as corrupt: Invalid cluster entry found (L2 offset: 0x40000, L2 index: 0x1); further corruption events will be suppressed +write failed: Input/output error + +### Corrupted L2 entries - write test (unallocated) ### + +# 'cluster is zero' bit set on the standard cluster descriptor + +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1048576 +L2 entry #0: 0x0000000000000001 0000000000000000 +L2 entry #0: 0x8000000000060000 0000000000000001 + +# 'subcluster is allocated' bit set + +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1048576 +L2 entry #0: 0x0000000000000000 0000000000000001 +qcow2: Marking image as corrupt: Invalid cluster entry found (L2 offset: 0x40000, L2 index: 0); further corruption events will be suppressed +write failed: Input/output error + +# Both 'subcluster is zero' and 'subcluster is allocated' bits set + +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1048576 +L2 entry #1: 0x0000000000000000 0000000100000001 +qcow2: Marking image as corrupt: Invalid cluster entry found (L2 offset: 0x40000, L2 index: 0x1); further corruption events will be suppressed +write failed: Input/output error + +### Compressed cluster with subcluster bitmap != 0 - write test ### + +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1048576 +L2 entry #0: 0x4000000000050000 0000000180000000 +wrote 65536/65536 bytes at offset 0 +64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +L2 entry #0: 0x8000000000060000 00000000ffffffff + +### Detect and repair unaligned clusters ### + +Formatting 'TEST_DIR/t.IMGFMT.base', fmt=raw size=131072 +# Corrupted L2 entry, allocated subcluster # +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=131072 backing_file=TEST_DIR/t.IMGFMT.base backing_fmt=raw +ERROR offset=50200: Data cluster is not properly aligned; L2 entry corrupted. +ERROR cluster 6 refcount=0 reference=1 +Rebuilding refcount structure +ERROR offset=50200: Data cluster is not properly aligned; L2 entry corrupted. +Repairing cluster 1 refcount=1 reference=0 +Repairing cluster 2 refcount=1 reference=0 +ERROR offset=50200: Data cluster is not properly aligned; L2 entry corrupted. +The following inconsistencies were found and repaired: + + 0 leaked clusters + 1 corruptions + +Double checking the fixed image now... + +1 errors were found on the image. +Data may be corrupted, or further writes to the image may corrupt it. +qcow2: Marking image as corrupt: Cluster allocation offset 0x50200 unaligned (L2 offset: 0x40000, L2 index: 0); further corruption events will be suppressed +read failed: Input/output error +# Corrupted L2 entry, no allocated subclusters # +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=131072 backing_file=TEST_DIR/t.IMGFMT.base backing_fmt=raw +Repairing offset=50200: Preallocated cluster is not properly aligned; L2 entry corrupted. +Leaked cluster 5 refcount=1 reference=0 +Repairing cluster 5 refcount=1 reference=0 +The following inconsistencies were found and repaired: + + 1 leaked clusters + 1 corruptions + +Double checking the fixed image now... +No errors were found on the image. + +### Image creation options ### + +# cluster_size < 16k +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1048576 +qemu-img: TEST_DIR/t.IMGFMT: Extended L2 entries are only supported with cluster sizes of at least 16384 bytes +# backing file and preallocation=metadata +Formatting 'TEST_DIR/t.IMGFMT.base', fmt=raw size=1048576 +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=524288 backing_file=TEST_DIR/t.IMGFMT.base backing_fmt=raw preallocation=metadata +Image resized. +read 524288/524288 bytes at offset 0 +512 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 524288/524288 bytes at offset 524288 +512 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +Offset Length Mapped to File +0 0x80000 0 TEST_DIR/t.qcow2.base +# backing file and preallocation=falloc +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=524288 backing_file=TEST_DIR/t.IMGFMT.base backing_fmt=raw preallocation=falloc +Image resized. +read 524288/524288 bytes at offset 0 +512 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 524288/524288 bytes at offset 524288 +512 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +Offset Length Mapped to File +0 0x80000 0 TEST_DIR/t.qcow2.base +# backing file and preallocation=full +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=524288 backing_file=TEST_DIR/t.IMGFMT.base backing_fmt=raw preallocation=full +Image resized. +read 524288/524288 bytes at offset 0 +512 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 524288/524288 bytes at offset 524288 +512 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +Offset Length Mapped to File +0 0x80000 0 TEST_DIR/t.qcow2.base + +### Image resizing with preallocation and backing files ### + +# resize --preallocation=metadata +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=515072 backing_file=TEST_DIR/t.IMGFMT.base backing_fmt=raw +Image resized. +read 515072/515072 bytes at offset 0 +503 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 522240/522240 bytes at offset 515072 +510 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +# resize --preallocation=falloc +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=515072 backing_file=TEST_DIR/t.IMGFMT.base backing_fmt=raw +Image resized. +read 515072/515072 bytes at offset 0 +503 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 522240/522240 bytes at offset 515072 +510 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +# resize --preallocation=full +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=515072 backing_file=TEST_DIR/t.IMGFMT.base backing_fmt=raw +Image resized. +read 515072/515072 bytes at offset 0 +503 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 522240/522240 bytes at offset 515072 +510 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) + +### Image resizing with preallocation without backing files ### + +# resize --preallocation=metadata +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=515072 +wrote 515072/515072 bytes at offset 0 +503 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +Image resized. +read 515072/515072 bytes at offset 0 +503 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 522240/522240 bytes at offset 515072 +510 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +# resize --preallocation=falloc +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=515072 +wrote 515072/515072 bytes at offset 0 +503 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +Image resized. +read 515072/515072 bytes at offset 0 +503 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 522240/522240 bytes at offset 515072 +510 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +# resize --preallocation=full +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=515072 +wrote 515072/515072 bytes at offset 0 +503 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +Image resized. +read 515072/515072 bytes at offset 0 +503 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 522240/522240 bytes at offset 515072 +510 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) + +### qemu-img measure ### + +# 512MB, extended_l2=off +required size: 327680 +fully allocated size: 537198592 +# 512MB, extended_l2=on +required size: 393216 +fully allocated size: 537264128 +# 16K clusters, 64GB, extended_l2=off +required size: 42008576 +fully allocated size: 68761485312 +# 16K clusters, 64GB, extended_l2=on +required size: 75579392 +fully allocated size: 68795056128 +# 8k clusters +qemu-img: Extended L2 entries are only supported with cluster sizes of at least 16384 bytes +# 1024 TB +required size: 309285027840 +fully allocated size: 1126209191870464 +# 1025 TB +qemu-img: The image size is too large (try using a larger cluster size) + +### qemu-img amend ### + +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1048576 +qemu-img: Invalid parameter 'extended_l2' +This option is only supported for image creation +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1048576 +qemu-img: Invalid parameter 'extended_l2' +This option is only supported for image creation + +### Test copy-on-write on an image with snapshots ### + +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1048576 +L2 entry #0: 0x8000000000050000 0000008000042000 +L2 entry #1: 0x8000000000060000 0000008000042000 +L2 entry #2: 0x8000000000070000 0000008000042000 +L2 entry #3: 0x8000000000080000 0000008000042000 +L2 entry #4: 0x8000000000090000 0000008000042000 +L2 entry #5: 0x80000000000a0000 0000008000042000 +L2 entry #6: 0x80000000000b0000 0000008000042000 +L2 entry #7: 0x80000000000c0000 0000008000042000 +L2 entry #8: 0x80000000000d0000 0000008000042000 +L2 entry #9: 0x80000000000e0000 0000008000042000 +L2 entry #0: 0x8000000000120000 000000800007e000 +L2 entry #1: 0x8000000000130000 000000800007fc00 +L2 entry #2: 0x8000000000140000 00000080001fe000 +L2 entry #3: 0x8000000000150000 000000800007e000 +L2 entry #4: 0x8000000000160000 000000000007ff80 +L2 entry #5: 0x8000000000170000 000000000007ffff +L2 entry #6: 0x00000000000b0000 0001808000042000 +L2 entry #7: 0x00000000000c0000 0000208000040000 +L2 entry #8: 0x8000000000180000 000000800007e000 +L2 entry #9: 0x00000000000e0000 000000c000042000 + +### Test concurrent requests ### + +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1048576 +blkdebug: Suspended request 'A' +blkdebug: Resuming request 'A' +wrote 2048/2048 bytes at offset 30720 +2 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +wrote 2048/2048 bytes at offset 20480 +2 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +wrote 2048/2048 bytes at offset 40960 +2 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +*** done diff --git a/tests/qemu-iotests/274.out b/tests/qemu-iotests/274.out index 3a36fe7dfd..bf5abd4c10 100644 --- a/tests/qemu-iotests/274.out +++ b/tests/qemu-iotests/274.out @@ -1,9 +1,9 @@ == Commit tests == -Formatting 'TEST_DIR/PID-base', fmt=qcow2 cluster_size=65536 compression_type=zlib size=2097152 lazy_refcounts=off refcount_bits=16 +Formatting 'TEST_DIR/PID-base', fmt=qcow2 cluster_size=65536 extended_l2=off compression_type=zlib size=2097152 lazy_refcounts=off refcount_bits=16 -Formatting 'TEST_DIR/PID-mid', fmt=qcow2 cluster_size=65536 compression_type=zlib size=1048576 backing_file=TEST_DIR/PID-base backing_fmt=qcow2 lazy_refcounts=off refcount_bits=16 +Formatting 'TEST_DIR/PID-mid', fmt=qcow2 cluster_size=65536 extended_l2=off compression_type=zlib size=1048576 backing_file=TEST_DIR/PID-base backing_fmt=qcow2 lazy_refcounts=off refcount_bits=16 -Formatting 'TEST_DIR/PID-top', fmt=qcow2 cluster_size=65536 compression_type=zlib size=2097152 backing_file=TEST_DIR/PID-mid backing_fmt=qcow2 lazy_refcounts=off refcount_bits=16 +Formatting 'TEST_DIR/PID-top', fmt=qcow2 cluster_size=65536 extended_l2=off compression_type=zlib size=2097152 backing_file=TEST_DIR/PID-mid backing_fmt=qcow2 lazy_refcounts=off refcount_bits=16 wrote 2097152/2097152 bytes at offset 0 2 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) @@ -57,6 +57,7 @@ Format specific information: lazy refcounts: false refcount bits: 16 corrupt: false + extended l2: false read 1048576/1048576 bytes at offset 0 1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) @@ -65,11 +66,11 @@ read 1048576/1048576 bytes at offset 1048576 1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) === Testing HMP commit (top -> mid) === -Formatting 'TEST_DIR/PID-base', fmt=qcow2 cluster_size=65536 compression_type=zlib size=2097152 lazy_refcounts=off refcount_bits=16 +Formatting 'TEST_DIR/PID-base', fmt=qcow2 cluster_size=65536 extended_l2=off compression_type=zlib size=2097152 lazy_refcounts=off refcount_bits=16 -Formatting 'TEST_DIR/PID-mid', fmt=qcow2 cluster_size=65536 compression_type=zlib size=1048576 backing_file=TEST_DIR/PID-base backing_fmt=qcow2 lazy_refcounts=off refcount_bits=16 +Formatting 'TEST_DIR/PID-mid', fmt=qcow2 cluster_size=65536 extended_l2=off compression_type=zlib size=1048576 backing_file=TEST_DIR/PID-base backing_fmt=qcow2 lazy_refcounts=off refcount_bits=16 -Formatting 'TEST_DIR/PID-top', fmt=qcow2 cluster_size=65536 compression_type=zlib size=2097152 backing_file=TEST_DIR/PID-mid backing_fmt=qcow2 lazy_refcounts=off refcount_bits=16 +Formatting 'TEST_DIR/PID-top', fmt=qcow2 cluster_size=65536 extended_l2=off compression_type=zlib size=2097152 backing_file=TEST_DIR/PID-mid backing_fmt=qcow2 lazy_refcounts=off refcount_bits=16 wrote 2097152/2097152 bytes at offset 0 2 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) @@ -88,6 +89,7 @@ Format specific information: lazy refcounts: false refcount bits: 16 corrupt: false + extended l2: false read 1048576/1048576 bytes at offset 0 1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) @@ -96,11 +98,11 @@ read 1048576/1048576 bytes at offset 1048576 1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) === Testing QMP active commit (top -> mid) === -Formatting 'TEST_DIR/PID-base', fmt=qcow2 cluster_size=65536 compression_type=zlib size=2097152 lazy_refcounts=off refcount_bits=16 +Formatting 'TEST_DIR/PID-base', fmt=qcow2 cluster_size=65536 extended_l2=off compression_type=zlib size=2097152 lazy_refcounts=off refcount_bits=16 -Formatting 'TEST_DIR/PID-mid', fmt=qcow2 cluster_size=65536 compression_type=zlib size=1048576 backing_file=TEST_DIR/PID-base backing_fmt=qcow2 lazy_refcounts=off refcount_bits=16 +Formatting 'TEST_DIR/PID-mid', fmt=qcow2 cluster_size=65536 extended_l2=off compression_type=zlib size=1048576 backing_file=TEST_DIR/PID-base backing_fmt=qcow2 lazy_refcounts=off refcount_bits=16 -Formatting 'TEST_DIR/PID-top', fmt=qcow2 cluster_size=65536 compression_type=zlib size=2097152 backing_file=TEST_DIR/PID-mid backing_fmt=qcow2 lazy_refcounts=off refcount_bits=16 +Formatting 'TEST_DIR/PID-top', fmt=qcow2 cluster_size=65536 extended_l2=off compression_type=zlib size=2097152 backing_file=TEST_DIR/PID-mid backing_fmt=qcow2 lazy_refcounts=off refcount_bits=16 wrote 2097152/2097152 bytes at offset 0 2 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) @@ -125,6 +127,7 @@ Format specific information: lazy refcounts: false refcount bits: 16 corrupt: false + extended l2: false read 1048576/1048576 bytes at offset 0 1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) @@ -134,9 +137,9 @@ read 1048576/1048576 bytes at offset 1048576 == Resize tests == === preallocation=off === -Formatting 'TEST_DIR/PID-base', fmt=qcow2 cluster_size=65536 compression_type=zlib size=6442450944 lazy_refcounts=off refcount_bits=16 +Formatting 'TEST_DIR/PID-base', fmt=qcow2 cluster_size=65536 extended_l2=off compression_type=zlib size=6442450944 lazy_refcounts=off refcount_bits=16 -Formatting 'TEST_DIR/PID-top', fmt=qcow2 cluster_size=65536 compression_type=zlib size=1073741824 backing_file=TEST_DIR/PID-base backing_fmt=qcow2 lazy_refcounts=off refcount_bits=16 +Formatting 'TEST_DIR/PID-top', fmt=qcow2 cluster_size=65536 extended_l2=off compression_type=zlib size=1073741824 backing_file=TEST_DIR/PID-base backing_fmt=qcow2 lazy_refcounts=off refcount_bits=16 wrote 65536/65536 bytes at offset 5368709120 64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) @@ -153,9 +156,9 @@ read 65536/65536 bytes at offset 5368709120 { "start": 1073741824, "length": 7516192768, "depth": 0, "zero": true, "data": false}] === preallocation=metadata === -Formatting 'TEST_DIR/PID-base', fmt=qcow2 cluster_size=65536 compression_type=zlib size=34359738368 lazy_refcounts=off refcount_bits=16 +Formatting 'TEST_DIR/PID-base', fmt=qcow2 cluster_size=65536 extended_l2=off compression_type=zlib size=34359738368 lazy_refcounts=off refcount_bits=16 -Formatting 'TEST_DIR/PID-top', fmt=qcow2 cluster_size=65536 compression_type=zlib size=32212254720 backing_file=TEST_DIR/PID-base backing_fmt=qcow2 lazy_refcounts=off refcount_bits=16 +Formatting 'TEST_DIR/PID-top', fmt=qcow2 cluster_size=65536 extended_l2=off compression_type=zlib size=32212254720 backing_file=TEST_DIR/PID-base backing_fmt=qcow2 lazy_refcounts=off refcount_bits=16 wrote 65536/65536 bytes at offset 33285996544 64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) @@ -177,9 +180,9 @@ read 65536/65536 bytes at offset 33285996544 { "start": 34896609280, "length": 536870912, "depth": 0, "zero": true, "data": false, "offset": 2685075456}] === preallocation=falloc === -Formatting 'TEST_DIR/PID-base', fmt=qcow2 cluster_size=65536 compression_type=zlib size=10485760 lazy_refcounts=off refcount_bits=16 +Formatting 'TEST_DIR/PID-base', fmt=qcow2 cluster_size=65536 extended_l2=off compression_type=zlib size=10485760 lazy_refcounts=off refcount_bits=16 -Formatting 'TEST_DIR/PID-top', fmt=qcow2 cluster_size=65536 compression_type=zlib size=5242880 backing_file=TEST_DIR/PID-base backing_fmt=qcow2 lazy_refcounts=off refcount_bits=16 +Formatting 'TEST_DIR/PID-top', fmt=qcow2 cluster_size=65536 extended_l2=off compression_type=zlib size=5242880 backing_file=TEST_DIR/PID-base backing_fmt=qcow2 lazy_refcounts=off refcount_bits=16 wrote 65536/65536 bytes at offset 9437184 64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) @@ -196,9 +199,9 @@ read 65536/65536 bytes at offset 9437184 { "start": 5242880, "length": 10485760, "depth": 0, "zero": false, "data": true, "offset": 327680}] === preallocation=full === -Formatting 'TEST_DIR/PID-base', fmt=qcow2 cluster_size=65536 compression_type=zlib size=16777216 lazy_refcounts=off refcount_bits=16 +Formatting 'TEST_DIR/PID-base', fmt=qcow2 cluster_size=65536 extended_l2=off compression_type=zlib size=16777216 lazy_refcounts=off refcount_bits=16 -Formatting 'TEST_DIR/PID-top', fmt=qcow2 cluster_size=65536 compression_type=zlib size=8388608 backing_file=TEST_DIR/PID-base backing_fmt=qcow2 lazy_refcounts=off refcount_bits=16 +Formatting 'TEST_DIR/PID-top', fmt=qcow2 cluster_size=65536 extended_l2=off compression_type=zlib size=8388608 backing_file=TEST_DIR/PID-base backing_fmt=qcow2 lazy_refcounts=off refcount_bits=16 wrote 65536/65536 bytes at offset 11534336 64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) @@ -215,9 +218,9 @@ read 65536/65536 bytes at offset 11534336 { "start": 8388608, "length": 4194304, "depth": 0, "zero": false, "data": true, "offset": 327680}] === preallocation=off === -Formatting 'TEST_DIR/PID-base', fmt=qcow2 cluster_size=65536 compression_type=zlib size=393216 lazy_refcounts=off refcount_bits=16 +Formatting 'TEST_DIR/PID-base', fmt=qcow2 cluster_size=65536 extended_l2=off compression_type=zlib size=393216 lazy_refcounts=off refcount_bits=16 -Formatting 'TEST_DIR/PID-top', fmt=qcow2 cluster_size=65536 compression_type=zlib size=259072 backing_file=TEST_DIR/PID-base backing_fmt=qcow2 lazy_refcounts=off refcount_bits=16 +Formatting 'TEST_DIR/PID-top', fmt=qcow2 cluster_size=65536 extended_l2=off compression_type=zlib size=259072 backing_file=TEST_DIR/PID-base backing_fmt=qcow2 lazy_refcounts=off refcount_bits=16 wrote 65536/65536 bytes at offset 259072 64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) @@ -235,9 +238,9 @@ read 65536/65536 bytes at offset 259072 { "start": 262144, "length": 262144, "depth": 0, "zero": true, "data": false}] === preallocation=off === -Formatting 'TEST_DIR/PID-base', fmt=qcow2 cluster_size=65536 compression_type=zlib size=409600 lazy_refcounts=off refcount_bits=16 +Formatting 'TEST_DIR/PID-base', fmt=qcow2 cluster_size=65536 extended_l2=off compression_type=zlib size=409600 lazy_refcounts=off refcount_bits=16 -Formatting 'TEST_DIR/PID-top', fmt=qcow2 cluster_size=65536 compression_type=zlib size=262144 backing_file=TEST_DIR/PID-base backing_fmt=qcow2 lazy_refcounts=off refcount_bits=16 +Formatting 'TEST_DIR/PID-top', fmt=qcow2 cluster_size=65536 extended_l2=off compression_type=zlib size=262144 backing_file=TEST_DIR/PID-base backing_fmt=qcow2 lazy_refcounts=off refcount_bits=16 wrote 65536/65536 bytes at offset 344064 64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) @@ -254,9 +257,9 @@ read 65536/65536 bytes at offset 344064 { "start": 262144, "length": 262144, "depth": 0, "zero": true, "data": false}] === preallocation=off === -Formatting 'TEST_DIR/PID-base', fmt=qcow2 cluster_size=65536 compression_type=zlib size=524288 lazy_refcounts=off refcount_bits=16 +Formatting 'TEST_DIR/PID-base', fmt=qcow2 cluster_size=65536 extended_l2=off compression_type=zlib size=524288 lazy_refcounts=off refcount_bits=16 -Formatting 'TEST_DIR/PID-top', fmt=qcow2 cluster_size=65536 compression_type=zlib size=262144 backing_file=TEST_DIR/PID-base backing_fmt=qcow2 lazy_refcounts=off refcount_bits=16 +Formatting 'TEST_DIR/PID-top', fmt=qcow2 cluster_size=65536 extended_l2=off compression_type=zlib size=262144 backing_file=TEST_DIR/PID-base backing_fmt=qcow2 lazy_refcounts=off refcount_bits=16 wrote 65536/65536 bytes at offset 446464 64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) diff --git a/tests/qemu-iotests/280.out b/tests/qemu-iotests/280.out index fc59b9bc5c..09a0f1a7cb 100644 --- a/tests/qemu-iotests/280.out +++ b/tests/qemu-iotests/280.out @@ -1,4 +1,4 @@ -Formatting 'TEST_DIR/PID-base', fmt=qcow2 cluster_size=65536 compression_type=zlib size=67108864 lazy_refcounts=off refcount_bits=16 +Formatting 'TEST_DIR/PID-base', fmt=qcow2 cluster_size=65536 extended_l2=off compression_type=zlib size=67108864 lazy_refcounts=off refcount_bits=16 === Launch VM === Enabling migration QMP events on VM... diff --git a/tests/qemu-iotests/291.out b/tests/qemu-iotests/291.out index 9f661515b4..ee89a72885 100644 --- a/tests/qemu-iotests/291.out +++ b/tests/qemu-iotests/291.out @@ -41,6 +41,7 @@ Format specific information: granularity: 65536 refcount bits: 16 corrupt: false + extended l2: false image: TEST_DIR/t.IMGFMT file format: IMGFMT virtual size: 10 MiB (10485760 bytes) @@ -65,6 +66,7 @@ Format specific information: granularity: 65536 refcount bits: 16 corrupt: false + extended l2: false === Check bitmap contents === diff --git a/tests/qemu-iotests/302.out b/tests/qemu-iotests/302.out index e37d3a1030..e2f6077e83 100644 --- a/tests/qemu-iotests/302.out +++ b/tests/qemu-iotests/302.out @@ -17,6 +17,7 @@ Format specific information: lazy refcounts: false refcount bits: 16 corrupt: false + extended l2: false === Converted image check === No errors were found on the image. diff --git a/tests/qemu-iotests/303.out b/tests/qemu-iotests/303.out index 7fa1edef0d..7c16998587 100644 --- a/tests/qemu-iotests/303.out +++ b/tests/qemu-iotests/303.out @@ -47,7 +47,7 @@ header_length 112 Header extension: magic 0x6803f857 (Feature table) -length 336 +length 384 data <binary> Header extension: @@ -105,7 +105,7 @@ Bitmap table type size offset { "name": "Feature table", "magic": 1745090647, - "length": 336, + "length": 384, "data_str": "<binary>" }, { diff --git a/tests/qemu-iotests/common.filter b/tests/qemu-iotests/common.filter index c6912be009..838ed15793 100644 --- a/tests/qemu-iotests/common.filter +++ b/tests/qemu-iotests/common.filter @@ -239,6 +239,7 @@ _filter_img_info() -e "/adapter_type: '[^']*'/d" \ -e "/hwversion: '[^']*'/d" \ -e "/lazy_refcounts: \\(on\\|off\\)/d" \ + -e "/extended_l2=\\(on\\|off\\)/d" \ -e "/block_size: [0-9]\\+/d" \ -e "/block_state_zero: \\(on\\|off\\)/d" \ -e "/log_size: [0-9]\\+/d" \ diff --git a/tests/qemu-iotests/group b/tests/qemu-iotests/group index a53ea7f78b..5cad015231 100644 --- a/tests/qemu-iotests/group +++ b/tests/qemu-iotests/group @@ -284,6 +284,7 @@ 267 rw auto quick snapshot 268 rw auto quick 270 rw backing quick +271 rw auto 272 rw 273 backing quick 274 rw backing diff --git a/tests/qtest/bios-tables-test.c b/tests/qtest/bios-tables-test.c index d25ff35492..504b810af5 100644 --- a/tests/qtest/bios-tables-test.c +++ b/tests/qtest/bios-tables-test.c @@ -80,6 +80,8 @@ typedef struct { GArray *tables; uint32_t smbios_ep_addr; struct smbios_21_entry_point smbios_ep_table; + uint16_t smbios_cpu_max_speed; + uint16_t smbios_cpu_curr_speed; uint8_t *required_struct_types; int required_struct_types_len; QTestState *qts; @@ -563,6 +565,31 @@ static inline bool smbios_single_instance(uint8_t type) } } +static bool smbios_cpu_test(test_data *data, uint32_t addr) +{ + uint16_t expect_speed[2]; + uint16_t real; + int offset[2]; + int i; + + /* Check CPU speed for backward compatibility */ + offset[0] = offsetof(struct smbios_type_4, max_speed); + offset[1] = offsetof(struct smbios_type_4, current_speed); + expect_speed[0] = data->smbios_cpu_max_speed ? : 2000; + expect_speed[1] = data->smbios_cpu_curr_speed ? : 2000; + + for (i = 0; i < 2; i++) { + real = qtest_readw(data->qts, addr + offset[i]); + if (real != expect_speed[i]) { + fprintf(stderr, "Unexpected SMBIOS CPU speed: real %u expect %u\n", + real, expect_speed[i]); + return false; + } + } + + return true; +} + static void test_smbios_structs(test_data *data) { DECLARE_BITMAP(struct_bitmap, SMBIOS_MAX_TYPE+1) = { 0 }; @@ -585,6 +612,10 @@ static void test_smbios_structs(test_data *data) } set_bit(type, struct_bitmap); + if (type == 4) { + g_assert(smbios_cpu_test(data, addr)); + } + /* seek to end of unformatted string area of this struct ("\0\0") */ prv = crt = 1; while (prv || crt) { @@ -719,6 +750,11 @@ static void test_acpi_q35_tcg(void) data.required_struct_types_len = ARRAY_SIZE(base_required_struct_types); test_acpi_one(NULL, &data); free_test_data(&data); + + data.smbios_cpu_max_speed = 3000; + data.smbios_cpu_curr_speed = 2600; + test_acpi_one("-smbios type=4,max-speed=3000,current-speed=2600", &data); + free_test_data(&data); } static void test_acpi_q35_tcg_bridge(void) @@ -1084,6 +1120,12 @@ static void test_acpi_virt_tcg(void) test_acpi_one("-cpu cortex-a57", &data); free_test_data(&data); + + data.smbios_cpu_max_speed = 2900; + data.smbios_cpu_curr_speed = 2700; + test_acpi_one("-cpu cortex-a57 " + "-smbios type=4,max-speed=2900,current-speed=2700", &data); + free_test_data(&data); } int main(int argc, char *argv[]) |