Spaces:

GroNLP
/

neural-acoustic-distance

Running

App Files Files Community

wietsedv commited on Apr 21, 2022

Commit

e7f4530

1 Parent(s): 39a0f1b

Update neural_acoustic_distance.py

Browse files

Files changed (1) hide show

neural_acoustic_distance.py +24 -24

neural_acoustic_distance.py CHANGED Viewed

@@ -112,11 +112,11 @@ def main():
     st.write(
         "This tool visualizes pronunciation differences between two recordings of the same word. The two recordings have to be wave files containing a single spoken word. \n\n\
-    Choose any wav2vec 2.0 compatible model identifier on the [Hugging Face Model Hub](https://huggingface.co/models?filter=wav2vec2) and select the output layer you want to use.\n\n\
-    To upload your own recordings select 'custom upload' in the audio file selection step. The first recording is put on the x-axis of the plot and the second one will be the reference recording for computing distance.\n\
-    You should already see an example plot of two sample recordings.\n\n\
-    This visualization tool is part of [neural representations for modeling variation in speech](https://doi.org/10.1016/j.wocn.2022.101137). \n\
-    Please see our paper for further details.")
     st.subheader("Model selection:")
@@ -208,28 +208,28 @@ def main():
         plt_id = randrange(0, 10)
         plt.savefig("./output/plot" + str(plt_id) + ".pdf")
         st.pyplot(fig)
-        print('7. Plot filled', datetime.now().strftime('%d-%m-%Y %H:%M:%S')) # test
-        if os.path.isfile("./output/plot.pdf"):
-            st.caption(" Visualization of neural acoustic distances\
-            per frame (based on wav2vec 2.0) with the pronunciation of\
-            the first filename on the x-axis and distances to the pronunciation\
-            of second filename on the y-axis. The horizontal line represents\
-            the global distance value (i.e. the average of all individual frames).\
-            The blue continuous line represents the moving average distance based on 9 frames,\
-            corresponding to 180ms. As a result of the moving average, the blue line does not cover the entire duration of\
-            the sample. Larger bullet sizes indicate that multiple\
-            frames in the pronunciation on the y-axis are aligned to a single frame in the pronunciation on the x-axis.")
-        with open("./output/plot.pdf", "rb") as file:
-            btn = st.download_button(label="Download plot", data=file, file_name="plot.pdf", mime="image/pdf")
-        print('8. End', datetime.now().strftime('%d-%m-%Y %H:%M:%S')) # test
-        print(f"9. RAM used: {psutil.Process().memory_info().rss / (1024 * 1024):.2f} MB") # test
 main()
 for name in dir():
     if not name.startswith('_'):
         del globals()[name]

     st.write(
         "This tool visualizes pronunciation differences between two recordings of the same word. The two recordings have to be wave files containing a single spoken word. \n\n\
+Choose any wav2vec 2.0 compatible model identifier on the [Hugging Face Model Hub](https://huggingface.co/models?filter=wav2vec2) and select the output layer you want to use.\n\n\
+To upload your own recordings select 'custom upload' in the audio file selection step. The first recording is put on the x-axis of the plot and the second one will be the reference recording for computing distance.\n\
+You should already see an example plot of two sample recordings.\n\n\
+This visualization tool is part of [neural representations for modeling variation in speech](https://doi.org/10.1016/j.wocn.2022.101137). \n\
+Please see our paper for further details.")
     st.subheader("Model selection:")
         plt_id = randrange(0, 10)
         plt.savefig("./output/plot" + str(plt_id) + ".pdf")
         st.pyplot(fig)
 main()
+print('7. Plot filled', datetime.now().strftime('%d-%m-%Y %H:%M:%S')) # test
+if os.path.isfile("./output/plot.pdf"):
+    st.caption(" Visualization of neural acoustic distances\
+    per frame (based on wav2vec 2.0) with the pronunciation of\
+    the first filename on the x-axis and distances to the pronunciation\
+    of second filename on the y-axis. The horizontal line represents\
+    the global distance value (i.e. the average of all individual frames).\
+    The blue continuous line represents the moving average distance based on 9 frames,\
+    corresponding to 180ms. As a result of the moving average, the blue line does not cover the entire duration of\
+    the sample. Larger bullet sizes indicate that multiple\
+    frames in the pronunciation on the y-axis are aligned to a single frame in the pronunciation on the x-axis.")
+with open("./output/plot.pdf", "rb") as file:
+    btn = st.download_button(label="Download plot", data=file, file_name="plot.pdf", mime="image/pdf")
+print('8. End', datetime.now().strftime('%d-%m-%Y %H:%M:%S')) # test
+print(f"9. RAM used: {psutil.Process().memory_info().rss / (1024 * 1024):.2f} MB") # test
 for name in dir():
     if not name.startswith('_'):
         del globals()[name]