PyTorch Lightning
Quick Example
import pytorch_lightning as pl
import json
class LitModel(pl.LightningModule):
def __init__(self):
super().__init__()
# Your model setup...
def training_step(self, batch, batch_idx):
# Your training logic...
loss = self.compute_loss(batch)
self.log("train_loss", loss)
return loss
def validation_step(self, batch, batch_idx):
# Your validation logic...
loss = self.compute_loss(batch)
acc = self.compute_accuracy(batch)
self.log("val_loss", loss)
self.log("val_acc", acc)
def on_train_epoch_end(self):
# Log to Valohai after each epoch
metrics = self.trainer.callback_metrics
print(
json.dumps(
{
"epoch": self.current_epoch,
"train_loss": float(metrics.get("train_loss", 0)),
"val_loss": float(metrics.get("val_loss", 0)),
"val_acc": float(metrics.get("val_acc", 0)),
},
),
)Why Use Hooks?
Complete Working Example
valohai.yaml Configuration
Alternative: Using Callbacks
Logging Custom Metrics
Available Hooks
Best Practices
Convert Tensors to Python Types
Use Consistent Metric Names
Don't Log Every Batch
Log Hyperparameters at Start
Common Issues
Metrics Not Available in Hook
Tensor Serialization Errors
Example Project
Next Steps
Last updated
Was this helpful?
