dictabert-joint-phonikud-tts

Sleeping

App Files Files Community

Update app.py

by VRDate - opened Dec 5, 2025

base: refs/heads/main

←

from: refs/pr/2

Discussion Files changed

+81

-138

Files changed (1) hide show

app.py +81 -138

app.py CHANGED Viewed

@@ -1,6 +1,6 @@
 """
-Hebrew Unified NLP - Async Parallel Pipeline 2025.12.5
-======================================================
 Architecture:
     Text → pysbd Sentence Breaker → async.gather([process(s) for s in sentences]) → JSON Array
@@ -287,6 +287,82 @@ LABEL_TRANSLATIONS = {
         # Construct state
         "Construct": {"en": "Construct", "he": "סמיכות"},
         "Free": {"en": "Free", "he": "נפרד"}
     }
 }
@@ -982,143 +1058,10 @@ def display_ner_entities(entities: list):
 # Internationalization (i18n)
 # ============================================================================
-TRANSLATIONS = {
-    "he": {
-        "title": "🇮🇱 עיבוד שפה עברית מאוחד",
-        "subtitle": "צינור מקבילי אסינכרוני | שבירת משפטים חוצה-פלטפורמה | ארכיטקטורת Map-Reduce",
-        "settings": "⚙️ הגדרות",
-        "language": "🌐 שפה",
-        "compute_mst": "צור עץ תחבירי",
-        "generate_speech": "צור דיבור",
-        "tts_params": "הגדרות דיבור",
-        "en_voice": "🇬🇧 קול אנגלית",
-        "speed": "מהירות",
-        "noise": "רעש",
-        "noise_w": "רעש W",
-        "sentence_pause": "השהיה בין משפטים (שניות)",
-        "sentence_breaker": "מפצל משפטים",
-        "available_backends": "מנועים זמינים",
-        "load_example": "📚 טען דוגמה:",
-        "enter_text": "הזן טקסט (תמיכה במספר משפטים):",
-        "placeholder": "הקלד טקסט...",
-        "analyze": "🔍 נתח",
-        "processing": "מעבד משפטים במקביל...",
-        "enter_text_warning": "אנא הזן טקסט לניתוח.",
-        "sentences": "משפטים",
-        "processing_time": "זמן עיבוד",
-        "workers": "עובדים",
-        "breaker": "מפצל",
-        "sentence": "משפט",
-        "original": "מקור:",
-        "nikud": "ניקוד:",
-        "phonemes": "פונמות:",
-        "dep_tree": "🌳 עץ תלויות",
-        "all_trees": "🌳 כל העצים",
-        "sentence_json": "📄 JSON משפט",
-        "morphology": "🔠 מורפולוגיה",
-        "ner": "🏷️ ישויות",
-        "speech": "🔊 דיבור",
-        "no_speech": "אין דיבור - הפעל 'צור דיבור' בהגדרות",
-        "duration": "משך",
-        "json_output": "📄 JSON",
-        "full_json": "פלט JSON מלא",
-        "download_json": "⬇️ הורד JSON",
-        "no_entities": "אין ישויות",
-        "models_not_loaded": "המודלים לא נטענו.",
-        "token": "טוקן",
-        "lemma": "למה",
-        "pos": "חלק דיבר",
-        "prefixes": "תחיליות",
-        "head": "ראש",
-        "rel": "יחס",
-        "play_all": "נגן הכל",
-        "stop": "עצור",
-        "playing": "מנגן משפט",
-        "finished": "✓ סיום",
-        "stopped": "נעצר",
-        "download_merged_speech": "הורד קובץ דיבור ממוזג (WAV)",
-        "download_speech_files": "הורד קבצי דיבור (WAV)",
-        "breaking_sentences": "מפצל משפטים...",
-        "processing_sentence": "מעבד משפט",
-        "finalizing": "מסיים...",
-        "done": "✓ הסתיים",
-        "no_sentences": "לא נמצאו משפטים",
-        "loading_models": "טוען מודלים...",
-        "loading_dictabert": "טוען DictaBERT...",
-        "loading_phonikud": "טוען Phonikud...",
-        "loading_piper": "טוען Piper TTS...",
-    },
-    "en": {
-        "title": "🇮🇱 Hebrew Unified NLP",
-        "subtitle": "Async Parallel Pipeline | Cross-Platform Sentence Breaking | Map-Reduce Architecture",
-        "settings": "⚙️ Settings",
-        "language": "🌐 Language",
-        "compute_mst": "Create Syntax Tree",
-        "generate_speech": "Generate speech",
-        "tts_params": "TTS Parameters",
-        "en_voice": "🇬🇧 English Voice",
-        "speed": "Speed",
-        "noise": "Noise",
-        "noise_w": "Noise W",
-        "sentence_pause": "Pause between sentences (sec)",
-        "sentence_breaker": "Sentence Breaker",
-        "available_backends": "Available backends",
-        "load_example": "📚 Load Example:",
-        "enter_text": "Enter text (multiple sentences supported):",
-        "placeholder": "Enter text...",
-        "analyze": "🔍 Analyze",
-        "processing": "Processing sentences in parallel...",
-        "enter_text_warning": "Please enter some text to analyze.",
-        "sentences": "Sentences",
-        "processing_time": "Processing",
-        "workers": "Workers",
-        "breaker": "Breaker",
-        "sentence": "Sentence",
-        "original": "Original:",
-        "nikud": "Nikud:",
-        "phonemes": "Phonemes:",
-        "dep_tree": "🌳 Dependency Tree",
-        "all_trees": "🌳 All Trees",
-        "sentence_json": "📄 Sentence JSON",
-        "morphology": "🔠 Morphology",
-        "ner": "🏷️ Named Entities",
-        "speech": "🔊 Speech",
-        "no_speech": "No speech - enable 'Generate speech' in settings",
-        "duration": "Duration",
-        "json_output": "📄 JSON",
-        "full_json": "Full JSON Output",
-        "download_json": "⬇️ Download JSON",
-        "no_entities": "No named entities",
-        "models_not_loaded": "Models not loaded.",
-        "token": "Token",
-        "lemma": "Lemma",
-        "pos": "POS",
-        "prefixes": "Prefixes",
-        "head": "Head",
-        "rel": "Rel",
-        "play_all": "Play All",
-        "stop": "Stop",
-        "playing": "Playing sentence",
-        "finished": "✓ Finished",
-        "stopped": "Stopped",
-        "download_merged_speech": "Download merged speech file (WAV)",
-        "download_speech_files": "Download speech Files (WAV)",
-        "breaking_sentences": "Breaking sentences...",
-        "processing_sentence": "Processing sentence",
-        "finalizing": "Finalizing...",
-        "done": "✓ Done",
-        "no_sentences": "No sentences found",
-        "loading_models": "Loading models...",
-        "loading_dictabert": "Loading DictaBERT...",
-        "loading_phonikud": "Loading Phonikud...",
-        "loading_piper": "Loading Piper TTS...",
-    }
-}
 def t(key: str) -> str:
-    """Get translated string for current language"""
     lang = st.session_state.get('ui_lang', 'he')
-    return TRANSLATIONS.get(lang, TRANSLATIONS['en']).get(key, key)
 def is_rtl() -> bool:
     """Check if current language is RTL"""
@@ -1130,7 +1073,7 @@ def is_rtl() -> bool:
 # ============================================================================
 st.set_page_config(
-    page_title="Hebrew Unified NLP",
     page_icon="🇮🇱",
     layout="wide"
 )

 """
+Bilingual NLP + TTS - Nikud · Syntax · NER · Speech
+====================================================
 Architecture:
     Text → pysbd Sentence Breaker → async.gather([process(s) for s in sentences]) → JSON Array
         # Construct state
         "Construct": {"en": "Construct", "he": "סמיכות"},
         "Free": {"en": "Free", "he": "נפרד"}
+    },
+    "ui": {
+        # App header
+        "title": {"en": "🇮🇱🇬🇧 Bilingual NLP + TTS", "he": "🇮🇱🇬🇧 ניתוח שפה דו-לשוני + דיבור"},
+        "page_title": {"en": "🇮🇱🇬🇧 Bilingual NLP + TTS", "he": "🇮🇱🇬🇧 ניתוח שפה דו-לשוני + דיבור"},
+        "subtitle": {"en": "Nikud · Syntax · NER · Speech", "he": "ניקוד · תחביר · ישויות · דיבור"},
+        # Settings
+        "settings": {"en": "⚙️ Settings", "he": "⚙️ הגדרות"},
+        "language": {"en": "🌐 Language", "he": "🌐 שפה"},
+        "compute_mst": {"en": "Create Syntax Tree", "he": "צור עץ תחבירי"},
+        "generate_speech": {"en": "Generate speech", "he": "צור דיבור"},
+        "tts_params": {"en": "TTS Parameters", "he": "הגדרות דיבור"},
+        "en_voice": {"en": "🇬🇧 English Voice", "he": "🇬🇧 קול אנגלית"},
+        "speed": {"en": "Speed", "he": "מהירות"},
+        "noise": {"en": "Noise", "he": "רעש"},
+        "noise_w": {"en": "Noise W", "he": "רעש W"},
+        "sentence_pause": {"en": "Pause between sentences (sec)", "he": "השהיה בין משפטים (שניות)"},
+        "sentence_breaker": {"en": "Sentence Breaker", "he": "מפצל משפטים"},
+        "available_backends": {"en": "Available backends", "he": "מנועים זמינים"},
+        # Input
+        "load_example": {"en": "📚 Load Example:", "he": "📚 טען דוגמה:"},
+        "enter_text": {"en": "Enter text (multiple sentences supported):", "he": "הזן טקסט (תמיכה במספר משפטים):"},
+        "placeholder": {"en": "Enter text...", "he": "הקלד טקסט..."},
+        "analyze": {"en": "🔍 Analyze", "he": "🔍 נתח"},
+        "enter_text_warning": {"en": "Please enter some text to analyze.", "he": "אנא הזן טקסט לניתוח."},
+        # Processing
+        "processing": {"en": "Processing sentences in parallel...", "he": "מעבד משפטים במקביל..."},
+        "breaking_sentences": {"en": "Breaking sentences...", "he": "מפצל משפטים..."},
+        "processing_sentence": {"en": "Processing sentence", "he": "מעבד משפט"},
+        "finalizing": {"en": "Finalizing...", "he": "מסיים..."},
+        "done": {"en": "✓ Done", "he": "✓ הסתיים"},
+        "no_sentences": {"en": "No sentences found", "he": "לא נמצאו משפטים"},
+        # Metrics
+        "sentences": {"en": "Sentences", "he": "משפטים"},
+        "processing_time": {"en": "Processing", "he": "זמן עיבוד"},
+        "workers": {"en": "Workers", "he": "עובדים"},
+        "breaker": {"en": "Breaker", "he": "מפצל"},
+        # Results
+        "sentence": {"en": "Sentence", "he": "משפט"},
+        "original": {"en": "Original:", "he": "מקור:"},
+        "nikud": {"en": "Nikud:", "he": "ניקוד:"},
+        "phonemes": {"en": "Phonemes:", "he": "פונמות:"},
+        "dep_tree": {"en": "🌳 Dependency Tree", "he": "🌳 עץ תלויות"},
+        "all_trees": {"en": "🌳 All Trees", "he": "🌳 כל העצים"},
+        "sentence_json": {"en": "📄 Sentence JSON", "he": "📄 JSON משפט"},
+        "morphology": {"en": "🔠 Morphology", "he": "🔠 מורפולוגיה"},
+        "ner": {"en": "🏷️ Named Entities", "he": "🏷️ ישויות"},
+        "no_entities": {"en": "No named entities", "he": "אין ישויות"},
+        # Speech
+        "speech": {"en": "🔊 Speech", "he": "🔊 דיבור"},
+        "no_speech": {"en": "No speech - enable 'Generate speech' in settings", "he": "אין דיבור - הפעל 'צור דיבור' בהגדרות"},
+        "duration": {"en": "Duration", "he": "משך"},
+        "play_all": {"en": "Play All", "he": "נגן הכל"},
+        "stop": {"en": "Stop", "he": "עצור"},
+        "playing": {"en": "Playing sentence", "he": "מנגן משפט"},
+        "finished": {"en": "✓ Finished", "he": "✓ סיום"},
+        "stopped": {"en": "Stopped", "he": "נעצר"},
+        "download_merged_speech": {"en": "Download merged speech file (WAV)", "he": "הורד קובץ דיבור ממוזג (WAV)"},
+        "download_speech_files": {"en": "Download speech Files (WAV)", "he": "הורד קבצי דיבור (WAV)"},
+        # JSON
+        "json_output": {"en": "📄 JSON", "he": "📄 JSON"},
+        "full_json": {"en": "Full JSON Output", "he": "פלט JSON מלא"},
+        "download_json": {"en": "⬇️ Download JSON", "he": "⬇️ הורד JSON"},
+        # Table columns
+        "token": {"en": "Token", "he": "טוקן"},
+        "lemma": {"en": "Lemma", "he": "למה"},
+        "pos": {"en": "POS", "he": "חלק דיבר"},
+        "prefixes": {"en": "Prefixes", "he": "תחיליות"},
+        "head": {"en": "Head", "he": "ראש"},
+        "rel": {"en": "Rel", "he": "יחס"},
+        # Loading
+        "models_not_loaded": {"en": "Models not loaded.", "he": "המודלים לא נטענו."},
+        "loading_models": {"en": "Loading models...", "he": "טוען מודלים..."},
+        "loading_dictabert": {"en": "Loading DictaBERT...", "he": "טוען DictaBERT..."},
+        "loading_phonikud": {"en": "Loading Phonikud...", "he": "טוען Phonikud..."},
+        "loading_piper": {"en": "Loading Piper TTS...", "he": "טוען Piper TTS..."},
     }
 }
 # Internationalization (i18n)
 # ============================================================================
 def t(key: str) -> str:
+    """Get translated UI string for current language."""
     lang = st.session_state.get('ui_lang', 'he')
+    return get_label("ui", key, lang)
 def is_rtl() -> bool:
     """Check if current language is RTL"""
 # ============================================================================
 st.set_page_config(
+    page_title="Bilingual NLP + TTS",
     page_icon="🇮🇱",
     layout="wide"
 )