write all data on training to training log

This commit is contained in:
2025-12-25 02:07:49 +00:00
parent 921e24b451
commit 4f500e8b4c

View File

@@ -176,18 +176,17 @@ def training_entry():
loss_fn.backward() loss_fn.backward()
opt.step() opt.step()
if step % TRAIN_PROGRESS == 0: with torch.no_grad():
with torch.no_grad(): pred = (torch.sigmoid(logits) > 0.5).float()
pred = (torch.sigmoid(logits) > 0.5).float() acc = (pred == y).float().mean().item()
acc = (pred == y).float().mean().item() tlog.write(f"step={step:5d} loss={loss_fn.item():.7f} acc={acc:.7f}\n")
tlog.write(f"step={step:5d} loss={loss_fn.item():.7f} acc={acc:.7f}\n")
# also print to normal text log occasionally to show some activity. # also print to normal text log occasionally to show some activity.
# every 100 steps check if its been longer than 5 seconds since we've updated the user # every 100 steps check if its been longer than 5 seconds since we've updated the user
if step % 100 == 0: if step % 100 == 0:
if (datetime.datetime.now() - last_ack).total_seconds() > 5: if (datetime.datetime.now() - last_ack).total_seconds() > 5:
log.info(f"still training... step={step} of {TRAIN_STEPS}") log.info(f"still training... step={step} of {TRAIN_STEPS}")
last_ack = datetime.datetime.now() last_ack = datetime.datetime.now()
training_end_time = datetime.datetime.now() training_end_time = datetime.datetime.now()
log.info(f"Training steps complete. Start time: {training_start_time} End time: {training_end_time}") log.info(f"Training steps complete. Start time: {training_start_time} End time: {training_end_time}")