Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 5 additions & 2 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions aptos-move/aptos-vm-types/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ move-core-types = { workspace = true }
move-vm-runtime = { workspace = true }
move-vm-types = { workspace = true }
rand = { workspace = true }
rustc-hash = { workspace = true }
serde = { workspace = true }
triomphe = { workspace = true }

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@

pub mod code_storage;
pub mod module_storage;
pub mod read_recording;

mod state_view_adapter;
pub use state_view_adapter::{AptosCodeStorageAdapter, AsAptosCodeStorage};
Original file line number Diff line number Diff line change
Expand Up @@ -17,4 +17,12 @@ pub trait AptosModuleStorage: ModuleStorage {
address: &AccountAddress,
module_name: &IdentStr,
) -> PartialVMResult<Option<StateValueMetadata>>;

/// Records `(address, module_name)` as a module read by the current transaction, for hot
/// state promotion. The default is a no-op; only the read-recording storage overrides it.
///
/// Lets callers record a script's declared dependencies from the loaded script, keeping
/// the recorded read set independent of the verified-script cache, whose warmth depends on
/// the execution schedule and must not influence the consensus-visible promoted set.
fn record_module_read(&self, _address: &AccountAddress, _module_name: &IdentStr) {}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,222 @@
// Copyright (c) Aptos Foundation
// Licensed pursuant to the Innovation-Enabling Source Code License, available at https://github.com/aptos-labs/aptos-core/blob/main/LICENSE

#![allow(clippy::duplicated_attributes)]

use crate::{
module_and_script_storage::module_storage::AptosModuleStorage,
resolver::{ambassador_impl_BlockSynchronizationKillSwitch, BlockSynchronizationKillSwitch},
};
use ambassador::delegate_to_methods;
use aptos_types::state_store::{state_key::StateKey, state_value::StateValueMetadata};
use bytes::Bytes;
use move_binary_format::{
errors::{PartialVMResult, VMResult},
file_format::CompiledScript,
CompiledModule,
};
use move_core_types::{
account_address::AccountAddress, identifier::IdentStr, language_storage::ModuleId,
};
use move_vm_runtime::{
ambassador_impl_LayoutCache, ambassador_impl_WithRuntimeEnvironment, LayoutCache,
LayoutCacheEntry, Module, ModuleStorage, RuntimeEnvironment, Script, StructKey,
WithRuntimeEnvironment,
};
use move_vm_types::code::{ambassador_impl_ScriptCache, Code, ScriptCache};
use rustc_hash::FxHashSet;
use std::{cell::RefCell, sync::Arc};

/// Wraps a code storage and records every module the VM fetches through it, so that module
/// accesses become part of the transaction's observed read set (the basis for hot state
/// promotion).
///
/// Scripts are not state items, so script cache accesses are not recorded.
pub struct ReadRecordingCodeStorage<'a, C> {
code_storage: &'a C,
module_reads: RefCell<FxHashSet<StateKey>>,
/// The previously recorded `(address, name)`. Recording sits above the module cache, and one
/// module resolution makes several back-to-back fetches of the same key (a load charges the
/// module's size then fetches it; the module is re-fetched to resolve the types it defines),
/// so skipping such a run here avoids a set lookup and key construction per cheap cache hit.
/// A `String` buffer rather than an `Identifier` so a module switch reuses the allocation.
last_recorded: RefCell<(AccountAddress, String)>,
}

impl<'a, C> ReadRecordingCodeStorage<'a, C> {
pub fn new(code_storage: &'a C) -> Self {
Self {
code_storage,
// Even a trivial transaction touches 10+ framework modules through its prologue
// and epilogue, so start with room for the typical count and skip the rehashes.
module_reads: RefCell::new(FxHashSet::with_capacity_and_hasher(24, Default::default())),
// Module names are never empty, so an empty name means "nothing recorded yet".
last_recorded: RefCell::new((AccountAddress::ZERO, String::new())),
}
}

/// Returns the state keys of modules fetched so far, deduplicated by key.
pub fn into_recorded_reads(self) -> FxHashSet<StateKey> {
self.module_reads.into_inner()
}

#[inline]
fn record(&self, address: &AccountAddress, module_name: &IdentStr) {
{
// Fast path: a run of accesses to the same module needs no further work. Only an
// exact (address, name) match is skipped, so the recorded set is identical either
// way.
let last = self.last_recorded.borrow();
if last.0 == *address && last.1.as_str() == module_name.as_str() {
return;
}
}
let key = StateKey::module(address, module_name);
self.module_reads.borrow_mut().insert(key);
let mut last = self.last_recorded.borrow_mut();
last.0 = *address;
last.1.clear();
last.1.push_str(module_name.as_str());
}
}

#[delegate_to_methods]
#[delegate(
WithRuntimeEnvironment,
target_ref = "inner",
where = "C: WithRuntimeEnvironment"
)]
#[delegate(LayoutCache, target_ref = "inner", where = "C: LayoutCache")]
#[delegate(
BlockSynchronizationKillSwitch,
target_ref = "inner",
where = "C: BlockSynchronizationKillSwitch"
)]
impl<C> ReadRecordingCodeStorage<'_, C> {
/// Returns the wrapped code storage.
fn inner(&self) -> &C {
self.code_storage
}
}

impl<C: ModuleStorage> ModuleStorage for ReadRecordingCodeStorage<'_, C> {
#[inline]
fn unmetered_check_module_exists(
&self,
address: &AccountAddress,
module_name: &IdentStr,
) -> VMResult<bool> {
self.record(address, module_name);
self.code_storage
.unmetered_check_module_exists(address, module_name)
}

#[inline]
fn unmetered_get_module_bytes(
&self,
address: &AccountAddress,
module_name: &IdentStr,
) -> VMResult<Option<Bytes>> {
self.record(address, module_name);
self.code_storage
.unmetered_get_module_bytes(address, module_name)
}

#[inline]
fn unmetered_get_module_hash_and_size(
&self,
address: &AccountAddress,
module_name: &IdentStr,
) -> VMResult<Option<([u8; 32], usize)>> {
self.record(address, module_name);
self.code_storage
.unmetered_get_module_hash_and_size(address, module_name)
}

#[inline]
fn unmetered_get_module_size(
&self,
address: &AccountAddress,
module_name: &IdentStr,
) -> VMResult<Option<usize>> {
self.record(address, module_name);
self.code_storage
.unmetered_get_module_size(address, module_name)
}

#[inline]
fn unmetered_get_deserialized_module(
&self,
address: &AccountAddress,
module_name: &IdentStr,
) -> VMResult<Option<Arc<CompiledModule>>> {
self.record(address, module_name);
self.code_storage
.unmetered_get_deserialized_module(address, module_name)
}

#[inline]
fn unmetered_get_eagerly_verified_module(
&self,
address: &AccountAddress,
module_name: &IdentStr,
) -> VMResult<Option<Arc<Module>>> {
self.record(address, module_name);
self.code_storage
.unmetered_get_eagerly_verified_module(address, module_name)

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Low: transitive module reads are still missed when eager loading is used.

unmetered_get_eagerly_verified_module() records only the top-level module before delegating to the inner storage. On the execute_function_bypass_visibility() paths that still use LegacyLoaderConfig::unmetered(), the eager Move loader recursively verifies transitive dependencies inside that inner ModuleStorage, so those dependency module fetches never pass back through this wrapper. In configurations with lazy loading disabled, transactions can therefore omit framework dependency modules that the VM actually touched from the recorded read set, and those modules will never be considered for hot-state promotion.

}

#[inline]
fn unmetered_get_lazily_verified_module(
&self,
module_id: &ModuleId,
) -> VMResult<Option<Arc<Module>>> {
self.record(module_id.address(), module_id.name());
self.code_storage
.unmetered_get_lazily_verified_module(module_id)
}

#[cfg(fuzzing)]
#[inline]
fn unmetered_get_module_skip_verification(
&self,
address: &AccountAddress,
module_name: &IdentStr,
) -> VMResult<Option<Arc<Module>>> {
self.record(address, module_name);
self.code_storage
.unmetered_get_module_skip_verification(address, module_name)
}
}

impl<C: AptosModuleStorage> AptosModuleStorage for ReadRecordingCodeStorage<'_, C> {
#[inline]
fn unmetered_get_module_state_value_metadata(
&self,
address: &AccountAddress,
module_name: &IdentStr,
) -> PartialVMResult<Option<StateValueMetadata>> {
self.record(address, module_name);
self.code_storage
.unmetered_get_module_state_value_metadata(address, module_name)
}

#[inline]
fn record_module_read(&self, address: &AccountAddress, module_name: &IdentStr) {
self.record(address, module_name);
}
}

#[delegate_to_methods]
#[delegate(ScriptCache, target_ref = "as_script_cache")]
impl<C> ReadRecordingCodeStorage<'_, C>
where
C: ScriptCache<Key = [u8; 32], Deserialized = CompiledScript, Verified = Script>,
{
/// Returns the wrapped script cache.
fn as_script_cache(
&self,
) -> &dyn ScriptCache<Key = [u8; 32], Deserialized = CompiledScript, Verified = Script> {
self.code_storage
}
}
26 changes: 26 additions & 0 deletions aptos-move/aptos-vm-types/src/output.rs
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ use move_core_types::{
};
use move_vm_runtime::execution_tracing::Trace;
use move_vm_types::delayed_values::delayed_field_id::DelayedFieldID;
use rustc_hash::FxHashSet;
use std::{collections::BTreeMap, mem};

/// Output produced by the VM after executing a transaction.
Expand Down Expand Up @@ -261,3 +262,28 @@ impl VMOutput {
self.into_transaction_output()
}
}

/// A transaction's read set, used for hot-state promotion. Unordered at the
/// per-transaction level; ordering is imposed later when aggregating per-block.
///
/// Data and module keys are kept as recorded. Both sides are already deduplicated and
/// they can never contain the same key (module and data state keys are disjoint), so
/// merging them into one set would only re-hash every module key.
#[derive(Clone, Debug, Default)]
pub struct UnorderedReadSet {
data_keys: FxHashSet<StateKey>,
module_keys: FxHashSet<StateKey>,
}

impl UnorderedReadSet {
pub fn new(data_keys: FxHashSet<StateKey>, module_keys: FxHashSet<StateKey>) -> Self {
Self {
data_keys,
module_keys,
}
}

pub fn iter(&self) -> impl Iterator<Item = &StateKey> {
self.data_keys.iter().chain(self.module_keys.iter())
}
}
2 changes: 2 additions & 0 deletions aptos-move/aptos-vm-types/src/resolver.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
// Copyright (c) Aptos Foundation
// Licensed pursuant to the Innovation-Enabling Source Code License, available at https://github.com/aptos-labs/aptos-core/blob/main/LICENSE

use ambassador::delegatable_trait;
use aptos_aggregator::resolver::{TAggregatorV1View, TDelayedFieldView};
use aptos_types::{
serde_helper::bcs_utils::size_u32_as_uleb128,
Expand All @@ -22,6 +23,7 @@ use std::collections::{BTreeMap, HashMap};
/// Allows requesting an immediate interrupt to ongoing transaction execution. For example, this
/// allows an early return from a useless speculative execution when block execution has already
/// halted (e.g. due to gas limit, committing only a block prefix).
#[delegatable_trait]
pub trait BlockSynchronizationKillSwitch {
fn interrupt_requested(&self) -> bool;
}
Expand Down
1 change: 1 addition & 0 deletions aptos-move/aptos-vm/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,7 @@ once_cell = { workspace = true }
ouroboros = { workspace = true }
rand = { workspace = true }
rayon = { workspace = true }
rustc-hash = { workspace = true }
serde = { workspace = true }
triomphe = { workspace = true }

Expand Down
11 changes: 10 additions & 1 deletion aptos-move/aptos-vm/src/aptos_vm.rs
Original file line number Diff line number Diff line change
Expand Up @@ -115,7 +115,7 @@ use aptos_vm_types::{
use claims::assert_err;
use fail::fail_point;
use move_binary_format::{
access::ModuleAccess,
access::{ModuleAccess, ScriptAccess},
compatibility::Compatibility,
deserializer::DeserializerConfig,
errors::{Location, PartialVMError, PartialVMResult, VMError, VMResult},
Expand Down Expand Up @@ -986,6 +986,15 @@ impl AptosVM {
self.reject_unstable_bytecode_for_script(script)?;
event_validation::verify_no_event_emission_in_compiled_script(script)?;

// Record the script's declared module dependencies as reads. These are a function of
// the script bytecode, so recording them here keeps the read set independent of the
// verified-script cache: its warmth depends on the execution schedule (parallel
// interleaving, aborts), so deriving these reads from cache-gated dependency fetches
// would make the hot-state promotion set nondeterministic across nodes.
for (address, module_name) in script.immediate_dependencies_iter() {
code_storage.record_module_read(address, module_name);
}

let args = dispatch_transaction_arg_validation!(
session,
&loader,
Expand Down
Loading
Loading