🪲 Add support for more quotes (#5791)

Fixes #5785 We already support multiple types of quotes for strings, but we were missing a few. A Polish translator got the build failing because they used a variation of double quotes that we do not support. This PR adds support for: - Polish variation of double quotes: „” - Swedish/Finish variation of double quotes: ”” - Korean quotes: 《》 - Japanese quotes: 「」 **How to test** In level 4, the following print statements should work without an error: ``` print „pol” print ”swe” print 《kor》 print 「jap」 ```
hedyorg · Sep 20, 2024 · c6e2d43 · c6e2d43
1 parent 5a386a4
commit c6e2d43
Show file tree

Hide file tree

Showing 5 changed files with 50 additions and 67 deletions.
diff --git a/grammars/keywords-zun.lark b/grammars/keywords-zun.lark
diff --git a/grammars/level4-Additions.lark b/grammars/level4-Additions.lark
@@ -21,9 +21,6 @@ list_access: var_access _AT (INT | random | var_access)
 // anything can be parsed except for a newline, a space and a list separator
 textwithoutspaces: /([^\n،,，、 ]+)/ -> text
 
-
-
-
-quoted_text: (/'((?:[^\\']|\\.)*)'/ | /"((?:[^\\"]|\\.)*)"/ | /‘((?:[^\\‘]|\\.)*)’/ | /“((?:[^\\”]|\\.)*)”/ | /«((?:[^\\»]|\\.)*)»/ | /„((?:[^\\“]|\\.)*)“/ ) -> text //text can be between single or double quotes, but quotes may be escaped with \
+quoted_text: (/'((?:[^\\']|\\.)*)'/ | /"((?:[^\\"]|\\.)*)"/ | /‘((?:[^\\‘]|\\.)*)’/ | /“((?:[^\\”]|\\.)*)”/ | /„((?:[^\\“”]|\\.)*)[“”]/ | /”((?:[^\\”]|\\.)*)”/ | /«((?:[^\\»]|\\.)*)»/ | /《((?:[^\\》]|\\.)*)》/ | /「((?:[^\\」]|\\.)*)」/ ) -> text //text can be between single or double quotes, but quotes may be escaped with \
 
 
diff --git a/hedy.py b/hedy.py
@@ -1429,12 +1429,16 @@ def process_characters_needing_escape(value):
 
 
 supported_quotes = {
-    "'": "'",  # single straight quotation marks
-    '"': '"',  # double straight quotation marks
-    '‘': '’',  # single curved quotation marks
-    "“": "”",  # double curved quotation marks or English quotes
-    "„": "“",  # inward double curved quotation marks or German quotes
-    "«": "»",  # guillemets or double angular marks or French quotes
+    "'": ["'"],  # single straight quotation marks
+    '"': ['"'],  # double straight quotation marks
+    '‘': ['’'],  # single curved quotation marks
+    "“": ["”"],  # double curved quotation marks or English quotes
+    "„": ["“",   # inward double curved quotation marks or German quotes
+          "”"],  # rightward double curved quotation marks or Polish quotes
+    '”': ['”'],  # rightward double curved quotation marks or Swedish/Finish quotes
+    "«": ["»"],  # guillemets or double angular marks or French quotes
+    "《": ["》"],  # Korean quotes
+    "「": ["」"],  # Japanese quotes
 }
 
 
@@ -1452,7 +1456,7 @@ def find_unquoted_segments(s):
             used_quote = c
             result += segment
             segment = c
-        elif used_quote and c == supported_quotes[used_quote]:
+        elif used_quote and c in supported_quotes[used_quote]:
             # if this is a valid closing quote, then empty the buffer as it holds a correctly quoted segment
             used_quote = None
             segment = ''

diff --git a/hedy_grammar.py b/hedy_grammar.py
@@ -2,7 +2,7 @@
 import warnings
 from os import path
 from functools import cache
-from hedy_translation import keywords_to_dict
+import hedy_translation
 
 """
 Because of the gradual nature of Hedy, the grammar of every level is just slightly different than the grammar of the
@@ -271,7 +271,7 @@ def expand_keyword_not_followed_by_space(**kwargs):
 
 def get_translated_keyword(keyword, lang):
     def get_keyword_value_from_lang(keyword_, lang_):
-        keywords = keywords_to_dict(lang_)
+        keywords = hedy_translation.keywords_to_dict(lang_)
         if keyword_ in keywords:
             return [k for k in keywords[keyword_] if k]
         else:

diff --git a/tests/test_level/test_level_04.py b/tests/test_level/test_level_04.py
@@ -78,6 +78,42 @@ def test_print_french_quoted_text(self):
             max_level=11,
             expected=expected)
 
+    def test_print_polish_quoted_text(self):
+        code = "print „bonjour tous le monde!”"
+        expected = "print(f'bonjour tous le monde!')"
+
+        self.multi_level_tester(
+            code=code,
+            max_level=11,
+            expected=expected)
+
+    def test_print_swedish_quoted_text(self):
+        code = "print ”bonjour tous le monde!”"
+        expected = "print(f'bonjour tous le monde!')"
+
+        self.multi_level_tester(
+            code=code,
+            max_level=11,
+            expected=expected)
+
+    def test_print_korean_quoted_text(self):
+        code = "print 《bonjour tous le monde!》"
+        expected = "print(f'bonjour tous le monde!')"
+
+        self.multi_level_tester(
+            code=code,
+            max_level=11,
+            expected=expected)
+
+    def test_print_japanese_quoted_text(self):
+        code = "print 「bonjour tous le monde!」"
+        expected = "print(f'bonjour tous le monde!')"
+
+        self.multi_level_tester(
+            code=code,
+            max_level=11,
+            expected=expected)
+
     def test_print_chinese_quoted_text(self):
         code = "print “逃离鬼屋！”"
         expected = "print(f'逃离鬼屋！')"