diff --git a/load_vocab.py b/load_vocab.py index 82ca400a03decf6405adbf5d12042d8a03471c9f..5a4f1f921636124a367d6c9ff8cbc1822b010e0f 100644 --- a/load_vocab.py +++ b/load_vocab.py @@ -4,6 +4,8 @@ import simfile import os from io import StringIO from simfile.notes import NoteData +from simfile.notes.timed import time_notes +from simfile.timing import TimingData def filter_whitespaces(notes): @@ -24,9 +26,9 @@ def index_tokens(token_sequence): def built_chart_vocabulary(song_dirs): vocabulary_set = set() - cols = 4 + cols_init = 4 # once add zero string - note_string = ["0"] * cols + note_string = ["0"] * cols_init notedata_new = StringIO() notedata_new.write("".join(note_string)) # print(notedata_new.getvalue()) @@ -46,57 +48,35 @@ def built_chart_vocabulary(song_dirs): charts = sm_file.charts # print("Built vocabulary") for chart in charts: - # print(chart.notes) - # notes ist string - #print(type(chart.notes)) - # from notes - notedata = NoteData(chart) - notes = NoteData.from_notes(notedata, cols) - new_notes = [] - - - for i, note in enumerate(notes): - note_string = ["0"] * cols - for j, note_2 in enumerate(notes): - if note.beat == note_2.beat: - note_string[note.column] = str(note) - note_string[note_2.column] = str(note_2) - if note_string == ["0"] * cols: - note_string[note.column] = str(note) - notedata_new = StringIO() - notedata_new.write("".join(note_string)) - #print(notedata_new.getvalue()) - new_notes.append(notedata_new.getvalue()) - - #for note in notes: - # new_notes.append(str(note)) - #print(notes) - #print(str(notes)) - #print(new_notes) - vocabulary_set.update(set(new_notes)) - #notes = NoteData.from_notes(notedata, cols) - #notes = str(notes) - #notes = chart.notes.replace('\n,\n', '\n') - #notes = notes.replace(',', '') - # print(chart.notes) - #notes = filter_whitespaces(notes) - #notes = notes.split("\n") - # print(set(notes)) - #vocabulary_set.update(set(notes)) - - # update vocabulary set if there are whitespaces and length not 4 - # updated_vocabulary_set = set() - # for elem in vocabuary_set: - # if len(elem) != 4: - # elem = re.sub(r"\s+", "", elem, flags=re.UNICODE) - # - # if len(elem) == 4: - # updated_vocabulary_set.update([elem]) + # notes is string + note_data = NoteData(chart) + timing_data = TimingData(sm_file) + cols = note_data.columns + + # dictionary + tmp = dict() + for timed_note in time_notes(note_data, timing_data): + if timed_note.time in tmp.keys(): + tmp[timed_note.time] = tmp[timed_note.time][:timed_note.note.column] + str(timed_note.note) + \ + tmp[timed_note.time][timed_note.note.column + 1:] + else: + note_string = ["0"] * cols + note_string[timed_note.note.column] = str(timed_note.note) + note_string_new = StringIO() + note_string_new.write("".join(note_string)) + tmp[timed_note.time] = note_string_new.getvalue() + + vocabulary_set.update(tmp.values()) + + # alternative dictionary + #tmp = dict() + #for i, note in enumerate(notes): + # tmp[note.beat].append(note) + + print("----------------------------------") print("Built Vocabulary:") - #print(updated_vocabulary_set) print(vocabulary_set) - #indexed_vocabulary = index_tokens(updated_vocabulary_set) indexed_vocabulary = index_tokens(vocabulary_set) print("Indexing:") print(indexed_vocabulary) diff --git a/vocabulary_run.sbatch b/vocabulary_run.sbatch index 37555ecbbf76a36fe4af66c6e1d353d5eecf341a..a7e7d8312334a2ead5ff9940446340714a336cbb 100644 --- a/vocabulary_run.sbatch +++ b/vocabulary_run.sbatch @@ -1,14 +1,13 @@ #!/bin/bash -#SBATCH -t 1-00:00:00 # time limit set to 1 week +#SBATCH -t 0-00:10:00 # time limit set to 10 minutes, 1 day: 1-00:00:00 #SBATCH --mem=1G # reserve 1GB of memory -#SBATCH -J Load_Vocabulary # the job name +#SBATCH -J Load_Vocabulary # the job name #SBATCH --mail-type=END,FAIL,TIME_LIMIT # send notification emails #SBATCH -n 5 # use 5 tasks -#SBATCH --cpus-per-task=1 # use 1 thread per taks +#SBATCH --cpus-per-task=1 # use 1 thread per task #SBATCH -N 1 # request slots on 1 node -#SBATCH --partition=informatik-mind # run on one of our DGX servers -#SBATCH --output=/scratch/grzonkow/vocabulary_3.txt # capture output -#SBATCH --error=/scratch/grzonkow/err_3.txt # and error streams +#SBATCH --output=/scratch/grzonkow/vocabulary.txt # capture output +#SBATCH --error=/scratch/grzonkow/err.txt # and error streams module load anaconda3/latest . $ANACONDA_HOME/etc/profile.d/conda.sh