diff --git a/main.py b/main.py index 3061e5a7aeb853ff91c59703d4a6458ab31b2227..decdb6b7908df816b6422b0490572296b9ec2a55 100644 --- a/main.py +++ b/main.py @@ -445,7 +445,7 @@ def setup_parser(): out = argparse.ArgumentParser() # # cluster version folder_given = "approach_empty_non_empty" - save_given = "approach_empty_non_empty_cpu" + save_given = "approach_empty_non_empty_new" out.add_argument('--get_vocabulary', default=f'/scratch/grzonkow/{folder_given}/vocabulary_{folder_given}.pkl', type=str, help="Path to load vocabulary") out.add_argument('--save_vocabulary', default=f'/scratch/grzonkow/{save_given}/vocabulary_new_{folder_given}.pkl', type=str, help="Path to safe vocabulary") @@ -880,13 +880,21 @@ if __name__ == '__main__': # if val_loss < best_val_loss: # best_val_loss = val_loss # torch.save(model.state_dict(), best_model_params_path) + elapsed = time.time() - epoch_start_time + # save model per epoch + torch.save(danceformer.state_dict(), f'{model_params_path}_epoch_{epoch}.pt') + # print epoch that ended, elapsed time + print('-' * 89) + print(f'| end of epoch {epoch:3d} | time: {elapsed:5.2f}s | ') + # f'valid loss {val_loss:5.2f} | valid ppl {val_ppl:8.2f}') + def open_txt_file(feature_name): with open(f"{details_dir}{feature_name}.txt", 'r') as fp: feature_pre = fp.read() feature_pre = feature_pre.split("\n") - feature_name = [float(t) for t in feature_pre[:-1]] + feature_name = [float(t) for t in feature_pre[:-1] if t != ''] return feature_name # load written data for different features and transform them in correct format @@ -914,13 +922,6 @@ if __name__ == '__main__': #greater_ratio_values = greater_ratio_values_all - elapsed = time.time() - epoch_start_time - # save model per epoch - torch.save(danceformer.state_dict(), f'{model_params_path}_epoch_{epoch}.pt') - # print epoch that ended, elapsed time - print('-' * 89) - print(f'| end of epoch {epoch:3d} | time: {elapsed:5.2f}s | ') - # f'valid loss {val_loss:5.2f} | valid ppl {val_ppl:8.2f}') # print status of features print(f"Last loss: {avg_loss}") diff --git a/model_run.sbatch b/model_run.sbatch index 53adafac796ad7d8680b41deb960b84697e2674f..ee2a9c9e1b62010a1b27b1bdc3dce5ad565fec6c 100644 --- a/model_run.sbatch +++ b/model_run.sbatch @@ -1,13 +1,14 @@ #!/bin/bash -#SBATCH -t 7-00:00:00 # time limit set to 1 week, 1 day 1-00:00:00 +#SBATCH -t 9-12:00:00 # time limit set to 1 week, 1 day 1-00:00:00 #SBATCH -J M_n_em_non_em # the job name #SBATCH --mail-type=END,FAIL,TIME_LIMIT # send notification emails #SBATCH -n 5 # use 5 tasks #SBATCH --cpus-per-task=1 # use 1 thread per taks -#SBATCH --mem-per-cpu=16G +#SBATCH --gpus=V100:1 # request 1 Volta V100 GPU +#SBATCH --partition=informatik-mind # run on one of our DGX servers #SBATCH -N 1 # request slots on 1 node -#SBATCH --output=/scratch/grzonkow/approach_empty_non_empty_cpu/model.txt # capture output -#SBATCH --error=/scratch/grzonkow/approach_empty_non_empty_cpu/err.txt # and error streams +#SBATCH --output=/scratch/grzonkow/approach_empty_non_empty_new/model.txt # capture output +#SBATCH --error=/scratch/grzonkow/approach_empty_non_empty_new/err.txt # and error streams