# List all Filestore instances
gcloud filestore instances list
# Get specific instance details
gcloud filestore instances describe INSTANCE_NAME \
--location=ZONE \
--format="value(networks.ipAddresses[0])"
import os
import sys
FILESTORE_PATH = "/mnt/filestore-data/"
# Verify mount is accessible
if not os.path.exists(FILESTORE_PATH):
print(f"ERROR: Filestore mount {FILESTORE_PATH} not accessible")
print("Possible causes:")
print(" - Wrong IP address in mount configuration")
print(" - VPC connectivity issue")
print(" - Filestore instance not running")
sys.exit(1)
# Verify expected data exists
expected_dir = os.path.join(FILESTORE_PATH, "datasets")
if not os.path.exists(expected_dir):
print(f"WARNING: Expected directory not found: {expected_dir}")
print(f"Available: {os.listdir(FILESTORE_PATH)}")
print(f"Filestore mount verified: {FILESTORE_PATH}")
# Today: Process data from Filestore
data = load_from_filestore("/mnt/filestore-data/")
model = train(data)
# Next week: Someone updates Filestore data
# Retraining gives different results
# Can't reproduce original model
# Load from Filestore (current state)
data = load_from_filestore("/mnt/filestore-data/")
# Save snapshot to versioned outputs
data.to_csv("/valohai/outputs/training_snapshot.csv")
# Create dataset version
metadata = {
"training_snapshot.csv": {
"valohai.dataset-versions": [
{
"uri": "dataset://training-data/2024-01-15",
},
],
},
}
# Train on versioned snapshot in next execution
# Can reproduce exactly anytime