From 386fdd928b3cd01b62d155f55b461d8d2cff8b40 Mon Sep 17 00:00:00 2001
From: AmPhIbIaN26 <43638430+AmPhIbIaN26@users.noreply.github.com>
Date: Sat, 10 Apr 2021 23:06:47 +0530
Subject: [PATCH 01/11] Implemented parsing Roman Numerals

I have implemented parsing for roman, for the case of parsing any roman numeral in a sentence you have to specify the language.
---
 number_parser/data/rom.py | 47 +++++++++++++++++++++++++++++++++++++++
 number_parser/parser.py   | 13 ++++++++++-
 2 files changed, 59 insertions(+), 1 deletion(-)
 create mode 100644 number_parser/data/rom.py

diff --git a/number_parser/data/rom.py b/number_parser/data/rom.py
new file mode 100644
index 0000000..c05939f
--- /dev/null
+++ b/number_parser/data/rom.py
@@ -0,0 +1,47 @@
+info = {
+    "UNIT_NUMBERS": {
+        "i": 1,
+        "ii": 2,
+        "iii": 3,
+        "iv": 5,
+        "vi": 6,
+        "vii": 7,
+        "viii": 8,
+        "ix": 9
+    },
+    "DIRECT_NUMBERS": {
+        "x": 10,
+
+    },
+    "TENS": {
+        "xx": 20,
+        "xxx": 30,
+        "xl": 40,
+        "l": 50,
+        "lx": 60,
+        "lxx": 70,
+        "lxxx": 80,
+        "xc": 90
+    },
+    "HUNDREDS": {
+        "c": 100,
+        "cc": 200,
+        "ccc": 300,
+        "cd": 400,
+        "d": 500,
+        "dc": 600,
+        "dcc": 700,
+        "dccc": 800,
+        "cm": 900
+    },
+    "BIG_POWERS_OF_TEN": {
+        "m": 1000,
+        "mm": 2000,
+        "mmm": 3000
+    },
+    "SKIP_TOKENS": [
+        "-",
+        "and"
+    ],
+    "USE_LONG_SCALE": False
+}
diff --git a/number_parser/parser.py b/number_parser/parser.py
index e0d67c2..59ce810 100644
--- a/number_parser/parser.py
+++ b/number_parser/parser.py
@@ -1,8 +1,9 @@
 import re
 from importlib import import_module
 import unicodedata
+
 SENTENCE_SEPARATORS = [".", ","]
-SUPPORTED_LANGUAGES = ['en', 'es', 'hi', 'ru']
+SUPPORTED_LANGUAGES = ['en', 'es', 'hi', 'ru', 'rom']
 RE_BUG_LANGUAGES = ['hi']
 
 
@@ -141,6 +142,8 @@ def _build_number(token_list, lang_data):
 
 def _tokenize(input_string, language):
     """Breaks string on any non-word character."""
+    if language == 'rom':
+        return re.split("^(m{0,3})(cm|cd|d?c{0,3})(xc|xl|l?x{0,3})(ix|iv|v?i{0,3})$", input_string.lower())
     input_string = input_string.replace('\xad', '')
     if language in RE_BUG_LANGUAGES:
         return re.split(r'(\s+)', input_string)
@@ -310,6 +313,14 @@ def parse(input_string, language=None):
 
     tokens = _tokenize(input_string, language)
 
+    if language == 'rom':
+        tokens = _tokenize(input_string, language=None)
+        for token in tokens:
+            if re.search("^(m{0,3})(cm|cd|d?c{0,3})(xc|xl|l?x{0,3})(ix|iv|v?i{0,3})$", token.lower()):
+                tokens[tokens.index(token)] = str(parse_number(token, language='rom'))
+        final_sentance = ''.join(tokens)
+        return final_sentance
+
     final_sentence = []
     current_sentence = []
     tokens_taken = []

From 81ed627b9644ff18a6271f41458d1f024b384cd0 Mon Sep 17 00:00:00 2001
From: AmPhIbIaN26 <43638430+AmPhIbIaN26@users.noreply.github.com>
Date: Thu, 15 Apr 2021 02:07:12 +0530
Subject: [PATCH 02/11] Revert "Implemented parsing Roman Numerals"

This reverts commit 386fdd928b3cd01b62d155f55b461d8d2cff8b40.
---
 number_parser/data/rom.py | 47 ---------------------------------------
 number_parser/parser.py   | 13 +----------
 2 files changed, 1 insertion(+), 59 deletions(-)
 delete mode 100644 number_parser/data/rom.py

diff --git a/number_parser/data/rom.py b/number_parser/data/rom.py
deleted file mode 100644
index c05939f..0000000
--- a/number_parser/data/rom.py
+++ /dev/null
@@ -1,47 +0,0 @@
-info = {
-    "UNIT_NUMBERS": {
-        "i": 1,
-        "ii": 2,
-        "iii": 3,
-        "iv": 5,
-        "vi": 6,
-        "vii": 7,
-        "viii": 8,
-        "ix": 9
-    },
-    "DIRECT_NUMBERS": {
-        "x": 10,
-
-    },
-    "TENS": {
-        "xx": 20,
-        "xxx": 30,
-        "xl": 40,
-        "l": 50,
-        "lx": 60,
-        "lxx": 70,
-        "lxxx": 80,
-        "xc": 90
-    },
-    "HUNDREDS": {
-        "c": 100,
-        "cc": 200,
-        "ccc": 300,
-        "cd": 400,
-        "d": 500,
-        "dc": 600,
-        "dcc": 700,
-        "dccc": 800,
-        "cm": 900
-    },
-    "BIG_POWERS_OF_TEN": {
-        "m": 1000,
-        "mm": 2000,
-        "mmm": 3000
-    },
-    "SKIP_TOKENS": [
-        "-",
-        "and"
-    ],
-    "USE_LONG_SCALE": False
-}
diff --git a/number_parser/parser.py b/number_parser/parser.py
index 59ce810..e0d67c2 100644
--- a/number_parser/parser.py
+++ b/number_parser/parser.py
@@ -1,9 +1,8 @@
 import re
 from importlib import import_module
 import unicodedata
-
 SENTENCE_SEPARATORS = [".", ","]
-SUPPORTED_LANGUAGES = ['en', 'es', 'hi', 'ru', 'rom']
+SUPPORTED_LANGUAGES = ['en', 'es', 'hi', 'ru']
 RE_BUG_LANGUAGES = ['hi']
 
 
@@ -142,8 +141,6 @@ def _build_number(token_list, lang_data):
 
 def _tokenize(input_string, language):
     """Breaks string on any non-word character."""
-    if language == 'rom':
-        return re.split("^(m{0,3})(cm|cd|d?c{0,3})(xc|xl|l?x{0,3})(ix|iv|v?i{0,3})$", input_string.lower())
     input_string = input_string.replace('\xad', '')
     if language in RE_BUG_LANGUAGES:
         return re.split(r'(\s+)', input_string)
@@ -313,14 +310,6 @@ def parse(input_string, language=None):
 
     tokens = _tokenize(input_string, language)
 
-    if language == 'rom':
-        tokens = _tokenize(input_string, language=None)
-        for token in tokens:
-            if re.search("^(m{0,3})(cm|cd|d?c{0,3})(xc|xl|l?x{0,3})(ix|iv|v?i{0,3})$", token.lower()):
-                tokens[tokens.index(token)] = str(parse_number(token, language='rom'))
-        final_sentance = ''.join(tokens)
-        return final_sentance
-
     final_sentence = []
     current_sentence = []
     tokens_taken = []

From 6658bc40e28c81771fe84f463e4a71ecaac632f7 Mon Sep 17 00:00:00 2001
From: AmPhIbIaN26 <43638430+AmPhIbIaN26@users.noreply.github.com>
Date: Thu, 15 Apr 2021 02:28:07 +0530
Subject: [PATCH 03/11] Implemented parsing Roman Numerals

I have implemented parsing for roman, for the case of parsing any roman numeral in a sentence you have to specify the language.
---
 number_parser/data/rom.py | 44 +++++++++++++++++++++++++++++++++++++++
 number_parser/parser.py   | 12 ++++++++++-
 2 files changed, 55 insertions(+), 1 deletion(-)
 create mode 100644 number_parser/data/rom.py

diff --git a/number_parser/data/rom.py b/number_parser/data/rom.py
new file mode 100644
index 0000000..a8bb44a
--- /dev/null
+++ b/number_parser/data/rom.py
@@ -0,0 +1,44 @@
+info = {
+    "UNIT_NUMBERS": {
+        "i": 1,
+        "ii": 2,
+        "iii": 3,
+        "iv": 5,
+        "vi": 6,
+        "vii": 7,
+        "viii": 8,
+        "ix": 9
+    },
+    "DIRECT_NUMBERS": {
+        "x": 10,
+
+    },
+    "TENS": {
+        "xx": 20,
+        "xxx": 30,
+        "xl": 40,
+        "l": 50,
+        "lx": 60,
+        "lxx": 70,
+        "lxxx": 80,
+        "xc": 90
+    },
+    "HUNDREDS": {
+        "c": 100,
+        "cc": 200,
+        "ccc": 300,
+        "cd": 400,
+        "d": 500,
+        "dc": 600,
+        "dcc": 700,
+        "dccc": 800,
+        "cm": 900
+    },
+    "BIG_POWERS_OF_TEN": {
+        "m": 1000,
+        "mm": 2000,
+        "mmm": 3000
+    },
+    "SKIP_TOKENS": [],
+    "USE_LONG_SCALE": False
+}
\ No newline at end of file
diff --git a/number_parser/parser.py b/number_parser/parser.py
index e0d67c2..f552f4e 100644
--- a/number_parser/parser.py
+++ b/number_parser/parser.py
@@ -2,7 +2,7 @@
 from importlib import import_module
 import unicodedata
 SENTENCE_SEPARATORS = [".", ","]
-SUPPORTED_LANGUAGES = ['en', 'es', 'hi', 'ru']
+SUPPORTED_LANGUAGES = ['en', 'es', 'hi', 'ru', 'rom']
 RE_BUG_LANGUAGES = ['hi']
 
 
@@ -141,6 +141,8 @@ def _build_number(token_list, lang_data):
 
 def _tokenize(input_string, language):
     """Breaks string on any non-word character."""
+    if language == 'rom':
+        return re.split("^(m{0,3})(cm|cd|d?c{0,3})(xc|xl|l?x{0,3})(ix|iv|v?i{0,3})$", input_string.lower())
     input_string = input_string.replace('\xad', '')
     if language in RE_BUG_LANGUAGES:
         return re.split(r'(\s+)', input_string)
@@ -310,6 +312,14 @@ def parse(input_string, language=None):
 
     tokens = _tokenize(input_string, language)
 
+    if language == 'rom':
+        tokens = _tokenize(input_string, language=None)
+        for token in tokens:
+            if re.search("^(m{0,3})(cm|cd|d?c{0,3})(xc|xl|l?x{0,3})(ix|iv|v?i{0,3})$", token.lower()):
+                tokens[tokens.index(token)] = str(parse_number(token, language='rom'))
+        final_sentance = ''.join(tokens)
+        return final_sentance
+
     final_sentence = []
     current_sentence = []
     tokens_taken = []

From ff64b93ab6767df981224a2a971748a9da9604f8 Mon Sep 17 00:00:00 2001
From: AmPhIbIaN26 <43638430+AmPhIbIaN26@users.noreply.github.com>
Date: Mon, 3 May 2021 23:47:07 +0530
Subject: [PATCH 04/11] Revert "Implemented parsing Roman Numerals"

This reverts commit 6658bc40e28c81771fe84f463e4a71ecaac632f7.
---
 number_parser/data/rom.py | 44 ---------------------------------------
 number_parser/parser.py   | 12 +----------
 2 files changed, 1 insertion(+), 55 deletions(-)
 delete mode 100644 number_parser/data/rom.py

diff --git a/number_parser/data/rom.py b/number_parser/data/rom.py
deleted file mode 100644
index a8bb44a..0000000
--- a/number_parser/data/rom.py
+++ /dev/null
@@ -1,44 +0,0 @@
-info = {
-    "UNIT_NUMBERS": {
-        "i": 1,
-        "ii": 2,
-        "iii": 3,
-        "iv": 5,
-        "vi": 6,
-        "vii": 7,
-        "viii": 8,
-        "ix": 9
-    },
-    "DIRECT_NUMBERS": {
-        "x": 10,
-
-    },
-    "TENS": {
-        "xx": 20,
-        "xxx": 30,
-        "xl": 40,
-        "l": 50,
-        "lx": 60,
-        "lxx": 70,
-        "lxxx": 80,
-        "xc": 90
-    },
-    "HUNDREDS": {
-        "c": 100,
-        "cc": 200,
-        "ccc": 300,
-        "cd": 400,
-        "d": 500,
-        "dc": 600,
-        "dcc": 700,
-        "dccc": 800,
-        "cm": 900
-    },
-    "BIG_POWERS_OF_TEN": {
-        "m": 1000,
-        "mm": 2000,
-        "mmm": 3000
-    },
-    "SKIP_TOKENS": [],
-    "USE_LONG_SCALE": False
-}
\ No newline at end of file
diff --git a/number_parser/parser.py b/number_parser/parser.py
index f552f4e..e0d67c2 100644
--- a/number_parser/parser.py
+++ b/number_parser/parser.py
@@ -2,7 +2,7 @@
 from importlib import import_module
 import unicodedata
 SENTENCE_SEPARATORS = [".", ","]
-SUPPORTED_LANGUAGES = ['en', 'es', 'hi', 'ru', 'rom']
+SUPPORTED_LANGUAGES = ['en', 'es', 'hi', 'ru']
 RE_BUG_LANGUAGES = ['hi']
 
 
@@ -141,8 +141,6 @@ def _build_number(token_list, lang_data):
 
 def _tokenize(input_string, language):
     """Breaks string on any non-word character."""
-    if language == 'rom':
-        return re.split("^(m{0,3})(cm|cd|d?c{0,3})(xc|xl|l?x{0,3})(ix|iv|v?i{0,3})$", input_string.lower())
     input_string = input_string.replace('\xad', '')
     if language in RE_BUG_LANGUAGES:
         return re.split(r'(\s+)', input_string)
@@ -312,14 +310,6 @@ def parse(input_string, language=None):
 
     tokens = _tokenize(input_string, language)
 
-    if language == 'rom':
-        tokens = _tokenize(input_string, language=None)
-        for token in tokens:
-            if re.search("^(m{0,3})(cm|cd|d?c{0,3})(xc|xl|l?x{0,3})(ix|iv|v?i{0,3})$", token.lower()):
-                tokens[tokens.index(token)] = str(parse_number(token, language='rom'))
-        final_sentance = ''.join(tokens)
-        return final_sentance
-
     final_sentence = []
     current_sentence = []
     tokens_taken = []

From c6285cb1ba6e75544a517aa62f2ae64010994d8f Mon Sep 17 00:00:00 2001
From: AmPhIbIaN26 <43638430+AmPhIbIaN26@users.noreply.github.com>
Date: Mon, 3 May 2021 23:47:18 +0530
Subject: [PATCH 05/11] Revert "Revert "Implemented parsing Roman Numerals""

This reverts commit 81ed627b9644ff18a6271f41458d1f024b384cd0.
---
 number_parser/data/rom.py | 47 +++++++++++++++++++++++++++++++++++++++
 number_parser/parser.py   | 13 ++++++++++-
 2 files changed, 59 insertions(+), 1 deletion(-)
 create mode 100644 number_parser/data/rom.py

diff --git a/number_parser/data/rom.py b/number_parser/data/rom.py
new file mode 100644
index 0000000..c05939f
--- /dev/null
+++ b/number_parser/data/rom.py
@@ -0,0 +1,47 @@
+info = {
+    "UNIT_NUMBERS": {
+        "i": 1,
+        "ii": 2,
+        "iii": 3,
+        "iv": 5,
+        "vi": 6,
+        "vii": 7,
+        "viii": 8,
+        "ix": 9
+    },
+    "DIRECT_NUMBERS": {
+        "x": 10,
+
+    },
+    "TENS": {
+        "xx": 20,
+        "xxx": 30,
+        "xl": 40,
+        "l": 50,
+        "lx": 60,
+        "lxx": 70,
+        "lxxx": 80,
+        "xc": 90
+    },
+    "HUNDREDS": {
+        "c": 100,
+        "cc": 200,
+        "ccc": 300,
+        "cd": 400,
+        "d": 500,
+        "dc": 600,
+        "dcc": 700,
+        "dccc": 800,
+        "cm": 900
+    },
+    "BIG_POWERS_OF_TEN": {
+        "m": 1000,
+        "mm": 2000,
+        "mmm": 3000
+    },
+    "SKIP_TOKENS": [
+        "-",
+        "and"
+    ],
+    "USE_LONG_SCALE": False
+}
diff --git a/number_parser/parser.py b/number_parser/parser.py
index e0d67c2..59ce810 100644
--- a/number_parser/parser.py
+++ b/number_parser/parser.py
@@ -1,8 +1,9 @@
 import re
 from importlib import import_module
 import unicodedata
+
 SENTENCE_SEPARATORS = [".", ","]
-SUPPORTED_LANGUAGES = ['en', 'es', 'hi', 'ru']
+SUPPORTED_LANGUAGES = ['en', 'es', 'hi', 'ru', 'rom']
 RE_BUG_LANGUAGES = ['hi']
 
 
@@ -141,6 +142,8 @@ def _build_number(token_list, lang_data):
 
 def _tokenize(input_string, language):
     """Breaks string on any non-word character."""
+    if language == 'rom':
+        return re.split("^(m{0,3})(cm|cd|d?c{0,3})(xc|xl|l?x{0,3})(ix|iv|v?i{0,3})$", input_string.lower())
     input_string = input_string.replace('\xad', '')
     if language in RE_BUG_LANGUAGES:
         return re.split(r'(\s+)', input_string)
@@ -310,6 +313,14 @@ def parse(input_string, language=None):
 
     tokens = _tokenize(input_string, language)
 
+    if language == 'rom':
+        tokens = _tokenize(input_string, language=None)
+        for token in tokens:
+            if re.search("^(m{0,3})(cm|cd|d?c{0,3})(xc|xl|l?x{0,3})(ix|iv|v?i{0,3})$", token.lower()):
+                tokens[tokens.index(token)] = str(parse_number(token, language='rom'))
+        final_sentance = ''.join(tokens)
+        return final_sentance
+
     final_sentence = []
     current_sentence = []
     tokens_taken = []

From ce8136562ed08767d12355bd6675eb094ab8027b Mon Sep 17 00:00:00 2001
From: AmPhIbIaN26 <43638430+AmPhIbIaN26@users.noreply.github.com>
Date: Mon, 3 May 2021 23:47:23 +0530
Subject: [PATCH 06/11] Revert "Implemented parsing Roman Numerals"

This reverts commit 386fdd928b3cd01b62d155f55b461d8d2cff8b40.
---
 number_parser/data/rom.py | 47 ---------------------------------------
 number_parser/parser.py   | 13 +----------
 2 files changed, 1 insertion(+), 59 deletions(-)
 delete mode 100644 number_parser/data/rom.py

diff --git a/number_parser/data/rom.py b/number_parser/data/rom.py
deleted file mode 100644
index c05939f..0000000
--- a/number_parser/data/rom.py
+++ /dev/null
@@ -1,47 +0,0 @@
-info = {
-    "UNIT_NUMBERS": {
-        "i": 1,
-        "ii": 2,
-        "iii": 3,
-        "iv": 5,
-        "vi": 6,
-        "vii": 7,
-        "viii": 8,
-        "ix": 9
-    },
-    "DIRECT_NUMBERS": {
-        "x": 10,
-
-    },
-    "TENS": {
-        "xx": 20,
-        "xxx": 30,
-        "xl": 40,
-        "l": 50,
-        "lx": 60,
-        "lxx": 70,
-        "lxxx": 80,
-        "xc": 90
-    },
-    "HUNDREDS": {
-        "c": 100,
-        "cc": 200,
-        "ccc": 300,
-        "cd": 400,
-        "d": 500,
-        "dc": 600,
-        "dcc": 700,
-        "dccc": 800,
-        "cm": 900
-    },
-    "BIG_POWERS_OF_TEN": {
-        "m": 1000,
-        "mm": 2000,
-        "mmm": 3000
-    },
-    "SKIP_TOKENS": [
-        "-",
-        "and"
-    ],
-    "USE_LONG_SCALE": False
-}
diff --git a/number_parser/parser.py b/number_parser/parser.py
index 59ce810..e0d67c2 100644
--- a/number_parser/parser.py
+++ b/number_parser/parser.py
@@ -1,9 +1,8 @@
 import re
 from importlib import import_module
 import unicodedata
-
 SENTENCE_SEPARATORS = [".", ","]
-SUPPORTED_LANGUAGES = ['en', 'es', 'hi', 'ru', 'rom']
+SUPPORTED_LANGUAGES = ['en', 'es', 'hi', 'ru']
 RE_BUG_LANGUAGES = ['hi']
 
 
@@ -142,8 +141,6 @@ def _build_number(token_list, lang_data):
 
 def _tokenize(input_string, language):
     """Breaks string on any non-word character."""
-    if language == 'rom':
-        return re.split("^(m{0,3})(cm|cd|d?c{0,3})(xc|xl|l?x{0,3})(ix|iv|v?i{0,3})$", input_string.lower())
     input_string = input_string.replace('\xad', '')
     if language in RE_BUG_LANGUAGES:
         return re.split(r'(\s+)', input_string)
@@ -313,14 +310,6 @@ def parse(input_string, language=None):
 
     tokens = _tokenize(input_string, language)
 
-    if language == 'rom':
-        tokens = _tokenize(input_string, language=None)
-        for token in tokens:
-            if re.search("^(m{0,3})(cm|cd|d?c{0,3})(xc|xl|l?x{0,3})(ix|iv|v?i{0,3})$", token.lower()):
-                tokens[tokens.index(token)] = str(parse_number(token, language='rom'))
-        final_sentance = ''.join(tokens)
-        return final_sentance
-
     final_sentence = []
     current_sentence = []
     tokens_taken = []

From b36483e3b5fb33ed583ab35c7e7740ac6b93aa25 Mon Sep 17 00:00:00 2001
From: AmPhIbIaN26 <43638430+AmPhIbIaN26@users.noreply.github.com>
Date: Mon, 3 May 2021 23:54:25 +0530
Subject: [PATCH 07/11] Update __init__.py

added encoding='utf8' to __init__.py
---
 tests/__init__.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/__init__.py b/tests/__init__.py
index 31eac93..f634674 100644
--- a/tests/__init__.py
+++ b/tests/__init__.py
@@ -20,7 +20,7 @@ def get_test_files(path, prefix):
 def _test_files(path, language, is_ordinal=True):
     fnx = parse_ordinal if is_ordinal else parse_number
     for filename in get_test_files(path, f'{language}_'):
-        with open(filename, "r") as csv_file:
+        with open(filename, "r", encoding='utf8') as csv_file:
             csv_reader = csv.DictReader(csv_file)
             for row in csv_reader:
                 try:

From be5d4e32bc631560cd79f7bcdd9ff71803680a10 Mon Sep 17 00:00:00 2001
From: AmPhIbIaN26 <43638430+AmPhIbIaN26@users.noreply.github.com>
Date: Mon, 3 May 2021 23:57:20 +0530
Subject: [PATCH 08/11] Revert "Update __init__.py"

This reverts commit b36483e3b5fb33ed583ab35c7e7740ac6b93aa25.
---
 tests/__init__.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/__init__.py b/tests/__init__.py
index f634674..31eac93 100644
--- a/tests/__init__.py
+++ b/tests/__init__.py
@@ -20,7 +20,7 @@ def get_test_files(path, prefix):
 def _test_files(path, language, is_ordinal=True):
     fnx = parse_ordinal if is_ordinal else parse_number
     for filename in get_test_files(path, f'{language}_'):
-        with open(filename, "r", encoding='utf8') as csv_file:
+        with open(filename, "r") as csv_file:
             csv_reader = csv.DictReader(csv_file)
             for row in csv_reader:
                 try:

From 8e2834318bc38f719013f9bbfc7d9eff4059d3c6 Mon Sep 17 00:00:00 2001
From: AmPhIbIaN26 <43638430+AmPhIbIaN26@users.noreply.github.com>
Date: Mon, 3 May 2021 23:57:22 +0530
Subject: [PATCH 09/11] Revert "Revert "Implemented parsing Roman Numerals""

This reverts commit ce8136562ed08767d12355bd6675eb094ab8027b.
---
 number_parser/data/rom.py | 47 +++++++++++++++++++++++++++++++++++++++
 number_parser/parser.py   | 13 ++++++++++-
 2 files changed, 59 insertions(+), 1 deletion(-)
 create mode 100644 number_parser/data/rom.py

diff --git a/number_parser/data/rom.py b/number_parser/data/rom.py
new file mode 100644
index 0000000..c05939f
--- /dev/null
+++ b/number_parser/data/rom.py
@@ -0,0 +1,47 @@
+info = {
+    "UNIT_NUMBERS": {
+        "i": 1,
+        "ii": 2,
+        "iii": 3,
+        "iv": 5,
+        "vi": 6,
+        "vii": 7,
+        "viii": 8,
+        "ix": 9
+    },
+    "DIRECT_NUMBERS": {
+        "x": 10,
+
+    },
+    "TENS": {
+        "xx": 20,
+        "xxx": 30,
+        "xl": 40,
+        "l": 50,
+        "lx": 60,
+        "lxx": 70,
+        "lxxx": 80,
+        "xc": 90
+    },
+    "HUNDREDS": {
+        "c": 100,
+        "cc": 200,
+        "ccc": 300,
+        "cd": 400,
+        "d": 500,
+        "dc": 600,
+        "dcc": 700,
+        "dccc": 800,
+        "cm": 900
+    },
+    "BIG_POWERS_OF_TEN": {
+        "m": 1000,
+        "mm": 2000,
+        "mmm": 3000
+    },
+    "SKIP_TOKENS": [
+        "-",
+        "and"
+    ],
+    "USE_LONG_SCALE": False
+}
diff --git a/number_parser/parser.py b/number_parser/parser.py
index e0d67c2..59ce810 100644
--- a/number_parser/parser.py
+++ b/number_parser/parser.py
@@ -1,8 +1,9 @@
 import re
 from importlib import import_module
 import unicodedata
+
 SENTENCE_SEPARATORS = [".", ","]
-SUPPORTED_LANGUAGES = ['en', 'es', 'hi', 'ru']
+SUPPORTED_LANGUAGES = ['en', 'es', 'hi', 'ru', 'rom']
 RE_BUG_LANGUAGES = ['hi']
 
 
@@ -141,6 +142,8 @@ def _build_number(token_list, lang_data):
 
 def _tokenize(input_string, language):
     """Breaks string on any non-word character."""
+    if language == 'rom':
+        return re.split("^(m{0,3})(cm|cd|d?c{0,3})(xc|xl|l?x{0,3})(ix|iv|v?i{0,3})$", input_string.lower())
     input_string = input_string.replace('\xad', '')
     if language in RE_BUG_LANGUAGES:
         return re.split(r'(\s+)', input_string)
@@ -310,6 +313,14 @@ def parse(input_string, language=None):
 
     tokens = _tokenize(input_string, language)
 
+    if language == 'rom':
+        tokens = _tokenize(input_string, language=None)
+        for token in tokens:
+            if re.search("^(m{0,3})(cm|cd|d?c{0,3})(xc|xl|l?x{0,3})(ix|iv|v?i{0,3})$", token.lower()):
+                tokens[tokens.index(token)] = str(parse_number(token, language='rom'))
+        final_sentance = ''.join(tokens)
+        return final_sentance
+
     final_sentence = []
     current_sentence = []
     tokens_taken = []

From 9a6fb48ed577607212cf49046c764c93291d2f96 Mon Sep 17 00:00:00 2001
From: AmPhIbIaN26 <43638430+AmPhIbIaN26@users.noreply.github.com>
Date: Mon, 3 May 2021 23:57:26 +0530
Subject: [PATCH 10/11] Revert "Revert "Revert "Implemented parsing Roman
 Numerals"""

This reverts commit c6285cb1ba6e75544a517aa62f2ae64010994d8f.
---
 number_parser/data/rom.py | 47 ---------------------------------------
 number_parser/parser.py   | 13 +----------
 2 files changed, 1 insertion(+), 59 deletions(-)
 delete mode 100644 number_parser/data/rom.py

diff --git a/number_parser/data/rom.py b/number_parser/data/rom.py
deleted file mode 100644
index c05939f..0000000
--- a/number_parser/data/rom.py
+++ /dev/null
@@ -1,47 +0,0 @@
-info = {
-    "UNIT_NUMBERS": {
-        "i": 1,
-        "ii": 2,
-        "iii": 3,
-        "iv": 5,
-        "vi": 6,
-        "vii": 7,
-        "viii": 8,
-        "ix": 9
-    },
-    "DIRECT_NUMBERS": {
-        "x": 10,
-
-    },
-    "TENS": {
-        "xx": 20,
-        "xxx": 30,
-        "xl": 40,
-        "l": 50,
-        "lx": 60,
-        "lxx": 70,
-        "lxxx": 80,
-        "xc": 90
-    },
-    "HUNDREDS": {
-        "c": 100,
-        "cc": 200,
-        "ccc": 300,
-        "cd": 400,
-        "d": 500,
-        "dc": 600,
-        "dcc": 700,
-        "dccc": 800,
-        "cm": 900
-    },
-    "BIG_POWERS_OF_TEN": {
-        "m": 1000,
-        "mm": 2000,
-        "mmm": 3000
-    },
-    "SKIP_TOKENS": [
-        "-",
-        "and"
-    ],
-    "USE_LONG_SCALE": False
-}
diff --git a/number_parser/parser.py b/number_parser/parser.py
index 59ce810..e0d67c2 100644
--- a/number_parser/parser.py
+++ b/number_parser/parser.py
@@ -1,9 +1,8 @@
 import re
 from importlib import import_module
 import unicodedata
-
 SENTENCE_SEPARATORS = [".", ","]
-SUPPORTED_LANGUAGES = ['en', 'es', 'hi', 'ru', 'rom']
+SUPPORTED_LANGUAGES = ['en', 'es', 'hi', 'ru']
 RE_BUG_LANGUAGES = ['hi']
 
 
@@ -142,8 +141,6 @@ def _build_number(token_list, lang_data):
 
 def _tokenize(input_string, language):
     """Breaks string on any non-word character."""
-    if language == 'rom':
-        return re.split("^(m{0,3})(cm|cd|d?c{0,3})(xc|xl|l?x{0,3})(ix|iv|v?i{0,3})$", input_string.lower())
     input_string = input_string.replace('\xad', '')
     if language in RE_BUG_LANGUAGES:
         return re.split(r'(\s+)', input_string)
@@ -313,14 +310,6 @@ def parse(input_string, language=None):
 
     tokens = _tokenize(input_string, language)
 
-    if language == 'rom':
-        tokens = _tokenize(input_string, language=None)
-        for token in tokens:
-            if re.search("^(m{0,3})(cm|cd|d?c{0,3})(xc|xl|l?x{0,3})(ix|iv|v?i{0,3})$", token.lower()):
-                tokens[tokens.index(token)] = str(parse_number(token, language='rom'))
-        final_sentance = ''.join(tokens)
-        return final_sentance
-
     final_sentence = []
     current_sentence = []
     tokens_taken = []

From 4c127ecd1c5fcb3f118f3ae62fde2e18b987eed4 Mon Sep 17 00:00:00 2001
From: AmPhIbIaN26 <43638430+AmPhIbIaN26@users.noreply.github.com>
Date: Mon, 3 May 2021 23:57:29 +0530
Subject: [PATCH 11/11] Revert "Revert "Implemented parsing Roman Numerals""

This reverts commit ff64b93ab6767df981224a2a971748a9da9604f8.
---
 number_parser/data/rom.py | 44 +++++++++++++++++++++++++++++++++++++++
 number_parser/parser.py   | 12 ++++++++++-
 2 files changed, 55 insertions(+), 1 deletion(-)
 create mode 100644 number_parser/data/rom.py

diff --git a/number_parser/data/rom.py b/number_parser/data/rom.py
new file mode 100644
index 0000000..a8bb44a
--- /dev/null
+++ b/number_parser/data/rom.py
@@ -0,0 +1,44 @@
+info = {
+    "UNIT_NUMBERS": {
+        "i": 1,
+        "ii": 2,
+        "iii": 3,
+        "iv": 5,
+        "vi": 6,
+        "vii": 7,
+        "viii": 8,
+        "ix": 9
+    },
+    "DIRECT_NUMBERS": {
+        "x": 10,
+
+    },
+    "TENS": {
+        "xx": 20,
+        "xxx": 30,
+        "xl": 40,
+        "l": 50,
+        "lx": 60,
+        "lxx": 70,
+        "lxxx": 80,
+        "xc": 90
+    },
+    "HUNDREDS": {
+        "c": 100,
+        "cc": 200,
+        "ccc": 300,
+        "cd": 400,
+        "d": 500,
+        "dc": 600,
+        "dcc": 700,
+        "dccc": 800,
+        "cm": 900
+    },
+    "BIG_POWERS_OF_TEN": {
+        "m": 1000,
+        "mm": 2000,
+        "mmm": 3000
+    },
+    "SKIP_TOKENS": [],
+    "USE_LONG_SCALE": False
+}
\ No newline at end of file
diff --git a/number_parser/parser.py b/number_parser/parser.py
index e0d67c2..f552f4e 100644
--- a/number_parser/parser.py
+++ b/number_parser/parser.py
@@ -2,7 +2,7 @@
 from importlib import import_module
 import unicodedata
 SENTENCE_SEPARATORS = [".", ","]
-SUPPORTED_LANGUAGES = ['en', 'es', 'hi', 'ru']
+SUPPORTED_LANGUAGES = ['en', 'es', 'hi', 'ru', 'rom']
 RE_BUG_LANGUAGES = ['hi']
 
 
@@ -141,6 +141,8 @@ def _build_number(token_list, lang_data):
 
 def _tokenize(input_string, language):
     """Breaks string on any non-word character."""
+    if language == 'rom':
+        return re.split("^(m{0,3})(cm|cd|d?c{0,3})(xc|xl|l?x{0,3})(ix|iv|v?i{0,3})$", input_string.lower())
     input_string = input_string.replace('\xad', '')
     if language in RE_BUG_LANGUAGES:
         return re.split(r'(\s+)', input_string)
@@ -310,6 +312,14 @@ def parse(input_string, language=None):
 
     tokens = _tokenize(input_string, language)
 
+    if language == 'rom':
+        tokens = _tokenize(input_string, language=None)
+        for token in tokens:
+            if re.search("^(m{0,3})(cm|cd|d?c{0,3})(xc|xl|l?x{0,3})(ix|iv|v?i{0,3})$", token.lower()):
+                tokens[tokens.index(token)] = str(parse_number(token, language='rom'))
+        final_sentance = ''.join(tokens)
+        return final_sentance
+
     final_sentence = []
     current_sentence = []
     tokens_taken = []