diff --git a/README.md b/README.md
new file mode 100644
index 0000000..53dc0be
--- /dev/null
+++ b/README.md
@@ -0,0 +1,63 @@
+# oimdp: OpenITI mARkdown Parser
+
+This Python library will parse an [OpenITI mARkdown](https://alraqmiyyat.github.io/mARkdown/) document and return a python class
+representation of the document structures.
+
+## Usage
+
+```py
+import oimdp
+
+md_file = open("mARkdownfile", "r")
+text = md_file.read()
+md_file.close()
+parsed = oimdp.parse(text)
+```
+
+## Parsed structure
+
+Please see [the docs](https://openiti.github.io/oimdp/), but here are some highlights:
+
+### Document API
+
+`content`: a list of content structures
+
+`get_clean_text()`: get the text stripped of markup
+
+### Content structures
+
+`Content` classes contain an original value from the document and some extracted content such as a text string or a specific value.
+
+Most other structures are listed in sequence (e.g. a `Paragraph` is followed by a `Line`). 
+
+`Line` objects and other line-level structures are divided in `PhrasePart` objects.
+
+`PhrasePart` are phrase-level tags
+
+## Develop
+
+Set up a virtual environment with `venv`
+
+```py
+python3 -m venv .env
+```
+
+Activate the virtual environment
+
+```py
+source .env/bin/activate
+```
+
+Install
+
+```py
+python setup.py install
+```
+
+## Tests
+
+With the environment activated:
+
+```py
+python tests/test.py
+```
\ No newline at end of file
diff --git a/oimdp/parser.py b/oimdp/parser.py
new file mode 100644
index 0000000..4b0603e
--- /dev/null
+++ b/oimdp/parser.py
@@ -0,0 +1,328 @@
+import sys
+import re
+from .structures import Age, Date, Document, Hemistich, Hukm, Isnad, Matn, NamedEntity, OpenTagAuto, OpenTagUser, PageNumber, Paragraph, Line, RouteDist, RouteFrom, RouteTowa, Verse, Milestone
+from .structures import SectionHeader, Editorial, DictionaryUnit, BioOrEvent
+from .structures import DoxographicalItem, MorphologicalPattern, TextPart
+from .structures import AdministrativeRegion, RouteOrDistance, Riwayat
+from . import tags as t
+
+PAGE_PATTERN = re.compile(r"PageV(\d+)P(\d+)")
+OPEN_TAG_CUSTOM_PATTERN = r"@[^@]+?@[^_@]+?_[^_@]+?(?:_[^_@]+?)?@"
+OPEN_TAG_CUSTOM_PATTERN_GROUPED = re.compile(
+    r"@([^@]+?)@([^_@]+?)_([^_@]+?)(_([^_@]+?))?@"
+)
+OPEN_TAG_AUTO_PATTERN = r"@[A-Z]{3}@[A-Z]{3,}@[A-Za-z]+@(?:-@[0tf][ftalmr]@)?"
+OPEN_TAG_AUTO_PATTERN_GROUPED = re.compile(
+    r"@([A-Z]{3})@([A-Z]{3,})@([A-Za-z]+)@(-@([0tf][ftalmr])@)?"
+)
+YEAR_PATTERN = [rf"{t.YEAR_AGE}\d{{1,4}}", rf"{t.YEAR_DEATH}\d{{1,4}}", rf"{t.YEAR_BIRTH}\d{{1,4}}", rf"{t.YEAR_OTHER}\d{{1,4}}"]
+TOP_PATTERN = [rf"{t.TOP_FULL}\d{{1,2}}", rf"{t.TOP}\d{{1,2}}"]
+PER_PATTERN = [rf"{t.PER_FULL}\d{{1,2}}", rf"{t.PER}\d{{1,2}}"]
+SOC_PATTERN = [rf"{t.SOC_FULL}\d{{1,2}}", rf"{t.SOC}\d{{1,2}}"]
+NAMED_ENTITIES_PATTERN = [*YEAR_PATTERN, *TOP_PATTERN, *PER_PATTERN, rf"{t.SRC}\d{{1,2}}", *SOC_PATTERN]
+
+
+def parse_tags(s: str):
+    return s
+
+
+def remove_phrase_lv_tags(s: str):
+    text_only = s
+    for tag in t.PHRASE_LV_TAGS:
+        text_only = text_only.replace(tag, '')
+    for tag in NAMED_ENTITIES_PATTERN:
+        text_only = re.compile(tag).sub('', text_only)
+    # Open tag
+    text_only = OPEN_TAG_CUSTOM_PATTERN_GROUPED.sub('', text_only)
+    text_only = OPEN_TAG_AUTO_PATTERN_GROUPED.sub('', text_only)
+    text_only = PAGE_PATTERN.sub('', text_only)
+    return text_only
+
+
+def parse_line(tagged_il: str, index: int, obj=Line, first_token=None):
+    """ parse a line text into LineParts by splitting it by tags and patterns """
+    # remove line tag
+    il = tagged_il.replace(t.LINE, '')
+
+    # get clean text
+    text_only = il
+    text_only = remove_phrase_lv_tags(text_only)
+
+    if text_only == "":
+        return None
+
+    line = obj(il, text_only)
+
+    # Split the line by tags. Make sure patterns do not include subgroups!
+    tokens = re.split(rf"(PageV\d+P\d+|{OPEN_TAG_AUTO_PATTERN}|{OPEN_TAG_CUSTOM_PATTERN}|{'|'.join([re.escape(t) for t in t.PHRASE_LV_TAGS])}|{'|'.join([t for t in NAMED_ENTITIES_PATTERN])})", il)
+
+    # Some structures inject a token at the beginning of a line, like a riwāyaŧ's isnād
+    if first_token:
+        line.add_part(first_token(""))
+
+    # Named entities include in their `text` property a given number of words from the following text token
+    # This variable is used to keep track. A "word" is just a space-separated token.
+    include_words = 0
+
+    for token in tokens:
+        if token == '':
+            continue
+
+        opentag_match = None
+        opentagauto_match = None
+
+        if token.startswith('@'):
+            opentag_match = OPEN_TAG_CUSTOM_PATTERN_GROUPED.match(token)
+            opentagauto_match = OPEN_TAG_AUTO_PATTERN_GROUPED.match(token)
+
+        if t.PAGE in token:
+            m = PAGE_PATTERN.search(token)
+            try:
+                line.add_part(PageNumber(token, m.group(1), m.group(2)))
+            except Exception:
+                raise Exception(
+                    'Could not parse page number at line: ' + str(index+1)
+                )
+        elif opentag_match:
+            line.add_part(OpenTagUser(token, 
+                opentag_match.group(1),  # user
+                opentag_match.group(2),  # t_type
+                opentag_match.group(3),  # t_subtype
+                opentag_match.group(5))) # t_subsubtype
+        elif opentagauto_match:
+            line.add_part(OpenTagAuto(token, 
+                opentagauto_match.group(1),  # resp
+                opentagauto_match.group(2),  # t_type                
+                opentagauto_match.group(3),  # category
+                opentagauto_match.group(5))) # review
+        elif t.HEMI in token:
+            line.add_part(Hemistich(token))
+        elif t.MILESTONE in token:
+            line.add_part(Milestone(token))
+        elif t.MATN in token:
+            line.add_part(Matn(token))
+        elif t.HUKM in token:
+            line.add_part(Hukm(token))
+        elif t.ROUTE_FROM in token:
+            line.add_part(RouteFrom(token))
+        elif t.ROUTE_TOWA in token:
+            line.add_part(RouteTowa(token))
+        elif t.ROUTE_DIST in token:
+            line.add_part(RouteDist(token))
+        elif t.YEAR_BIRTH in token:
+            line.add_part(Date(token, token.replace(t.YEAR_BIRTH, ''), 'birth'))
+        elif t.YEAR_DEATH in token:
+            line.add_part(Date(token, token.replace(t.YEAR_DEATH, ''), 'death'))
+        elif t.YEAR_OTHER in token:
+            line.add_part(Date(token, token.replace(t.YEAR_OTHER, ''), 'other'))
+        elif t.YEAR_AGE in token:
+            line.add_part(Age(token, token.replace(t.YEAR_AGE, '')))
+        elif t.SRC in token:
+            val = token.replace(t.SRC, '')
+            include_words = int(val[1])
+            line.add_part(NamedEntity(token, int(val[0]), include_words, "", 'src'))
+        elif t.SOC_FULL in token:
+            val = token.replace(t.SOC_FULL, '')
+            include_words = int(val[1])
+            line.add_part(NamedEntity(token, int(val[0]), include_words, "", 'soc'))
+        elif t.SOC in token in token:
+            val = token.replace(t.SOC, '')
+            include_words = int(val[1])
+            line.add_part(NamedEntity(token, int(val[0]), include_words, "", 'soc'))
+        elif t.TOP_FULL in token:
+            val = token.replace(t.TOP_FULL, '')
+            include_words = int(val[1])
+            line.add_part(NamedEntity(token, int(val[0]), include_words, "", 'top'))
+        elif t.TOP in token:
+            val = token.replace(t.TOP, '')
+            include_words = int(val[1])
+            line.add_part(NamedEntity(token, int(val[0]), include_words, "", 'top'))
+        elif t.PER_FULL in token:
+            val = token.replace(t.PER_FULL, '')
+            include_words = int(val[1])
+            line.add_part(NamedEntity(token, int(val[0]), include_words, "", 'per'))
+        elif t.PER in token:
+            val = token.replace(t.PER, '')
+            include_words = int(val[1])
+            line.add_part(NamedEntity(token, int(val[0]), include_words, "", 'per'))
+        else:
+            if include_words > 0:
+                rest = ""
+                words = token.strip().split()
+                for pos, word in enumerate(reversed(words)): # reversing split for r-t-l script
+                    if (pos < include_words):
+                        line.parts[-1].text = line.parts[-1].text + word + " "
+                    else:
+                        rest = rest + word + " "
+                if len(rest):
+                    line.add_part(TextPart(rest))
+                include_words = 0
+            else:
+                line.add_part(TextPart(token))
+    return line
+
+
+def parser(text: str):
+    """Parses an OpenITI mARkdown file and returns a Document object"""
+    document = Document(text)
+
+    # Split input text into lines
+    ilines = text.splitlines()
+
+    # Magic value
+    magic_value = ilines[0]
+
+    if magic_value.strip() != "######OpenITI#":
+        raise Exception(
+            "This does not appear to be an OpenITI mARkdown document")
+        sys.exit(1)
+
+    document.set_magic_value(magic_value)
+
+    # RE patterns
+    para_pattern = re.compile(r"^#($|[^#])")    
+    bio_pattern = re.compile(rf"{re.escape(t.BIO_MAN)}[^#]")
+    morpho_pattern = re.compile(r"#~:([^:]+?):")
+    region_pattern = re.compile(
+        rf"({t.PROV}|{t.REG}\d) .*? {t.GEO_TYPE} .*? ({t.REG}\d|{t.STTL}) ([\w# ]+) $"
+    )
+
+    # Input lines loop
+    for i, il in enumerate(ilines):
+        
+        # N.B. the order of if statements matters!
+        # We're doing string matching and tag elements are re-used.
+
+        # Non-machine readable metadata
+        if (il.startswith(t.META)):
+            if (il.strip() == t.METAEND):
+                continue
+            value = il.split(t.META, 1)[1].strip()
+            document.set_simple_metadata_field(il, value)
+
+        # Content-level page numbers
+        elif (il.startswith(t.PAGE)):
+            pv = PAGE_PATTERN.search(il)
+            try:
+                document.add_content(PageNumber(il, pv.group(1), pv.group(2)))
+            except Exception:
+                raise Exception(
+                    'Could not parse page number at line: ' + str(i+1)
+                )
+
+        # Riwāyāt units
+        elif (il.startswith(t.RWY)):
+            # Set first line, skipping para marker "# $RWY$"
+            document.add_content(Riwayat())
+            first_line = parse_line(il[7:], i, first_token=Isnad)
+            if first_line:
+                document.add_content(first_line)
+
+        # Routes
+        elif (il.startswith(t.ROUTE_FROM)):
+            document.add_content(parse_line(il, i, RouteOrDistance))
+
+        # Morphological pattern
+        elif (morpho_pattern.search(il)):
+            m = morpho_pattern.search(il)
+            document.add_content(MorphologicalPattern(il, m.group(1)))
+
+        # Paragraphs and lines of verse
+        elif (para_pattern.search(il)):
+            if (t.HEMI in il):
+                # this is a verse line, skip para marker "#"
+                document.add_content(parse_line(il[1:], i, Verse))
+            else:
+                document.add_content(Paragraph())
+                first_line = parse_line(il[1:], i)
+                if first_line:
+                    document.add_content(first_line)
+
+        # Lines
+        elif (il.startswith(t.LINE)):
+            document.add_content(parse_line(il, i))
+
+        # Editorial section
+        elif (il.startswith(t.EDITORIAL)):
+            document.add_content(Editorial(il))
+
+        # Section headers
+        elif (il.startswith(t.HEADER1)):
+            value = il
+            for tag in t.HEADERS:
+                value = value.replace(tag, '')
+            # remove other phrase level tags
+            value = remove_phrase_lv_tags(value)
+            # TODO: capture tags as PhraseParts
+            level = 1
+            if (t.HEADER5 in il):
+                level = 5
+            elif (t.HEADER4 in il):
+                level = 4
+            elif (t.HEADER3 in il):
+                level = 3
+            elif (t.HEADER2 in il):
+                level = 2
+            
+            document.add_content(SectionHeader(il, value, level))
+
+        # Dictionary entry
+        elif (il.startswith(t.DIC)):
+            no_tag = il
+            for tag in t.DICTIONARIES:
+                no_tag = no_tag.replace(tag, '')
+            first_line = parse_line(no_tag, i)
+            dic_type = "bib"
+            if (t.DIC_LEX in il):
+                dic_type = "lex"
+            elif (t.DIC_NIS in il):
+                dic_type = "nis"
+            elif (t.DIC_TOP in il):
+                dic_type = "top"
+            document.add_content(DictionaryUnit(il, dic_type))
+            if first_line:
+                document.add_content(first_line)
+
+        # Doxographical item
+        elif (il.startswith(t.DOX)):
+            no_tag = il
+            for tag in t.DOXOGRAPHICAL:
+                no_tag = no_tag.replace(tag, '')
+            first_line = parse_line(no_tag, i)
+            dox_type = "pos"
+            if (t.DOX_SEC in il):
+                dox_type = "sec"
+            document.add_content(DoxographicalItem(il, dox_type))
+            if first_line:
+                document.add_content(first_line)
+
+        # Biographies and Events
+        elif (bio_pattern.search(il) or il.startswith(t.BIO) or il.startswith(t.EVENT)):
+            no_tag = il
+            for tag in t.BIOS_EVENTS:
+                no_tag = no_tag.replace(tag, '')
+            first_line = parse_line(no_tag, i)
+            be_type = "man"
+            # Ordered from longer to shorter string to aid matching. I.e. ### $$$ before ### $$
+            if (t.LIST_NAMES_FULL in il or t.LIST_NAMES in il):
+                be_type = "names"
+            elif (t.BIO_REF_FULL in il or t.BIO_REF in il):
+                be_type = "ref"
+            elif (t.BIO_WOM_FULL in il or t.BIO_WOM in il):
+                be_type = "wom"
+            elif (t.LIST_EVENTS in il):
+                be_type = "events"
+            elif (t.EVENT in il):
+                be_type = "event"
+            document.add_content(BioOrEvent(il, be_type))
+            if first_line:
+                document.add_content(first_line)
+
+        # Regions
+        elif (region_pattern.search(il)):
+            document.add_content(AdministrativeRegion(il))
+
+        else:
+            continue
+
+    return document
diff --git a/oimdp/structures.py b/oimdp/structures.py
new file mode 100644
index 0000000..46cb679
--- /dev/null
+++ b/oimdp/structures.py
@@ -0,0 +1,287 @@
+from typing import List, Literal
+
+
+class MagicValue:
+    """Magic Value of OpenITI mARkdown file"""
+    def __init__(self, orig: str):
+        self.orig = orig
+        self.value = "######OpenITI#"
+
+    def __str__(self):
+        return self.value
+
+
+class SimpleMetadataField:
+    """A non-machine readable metadata field"""
+    def __init__(self, orig: str, value: str):
+        self.orig = orig
+        self.value = value
+
+    def __str__(self):
+        return self.value
+
+
+class LinePart:
+    """A line-level tag"""
+    def __init__(self, orig: str):
+        self.orig = orig
+
+    def __str__(self):
+        return self.orig
+
+
+class TextPart(LinePart):
+    """Phrase-level text"""
+    def __init__(self, orig: str):
+        self.orig = orig
+        self.text = orig
+
+    def __str__(self):
+        return self.text
+
+
+class Date(LinePart):
+    """A date in running text"""
+    def __init__(self, orig: str, value: str, date_type: str):
+        self.orig = orig
+        self.value = value
+        self.date_type: Literal["birth", "death", "age", "other"] = date_type
+
+    def __str__(self):
+        return self.orig
+
+class Age(LinePart):
+    """A number indicating age in running text"""
+    def __init__(self, orig: str, value: str):
+        self.orig = orig
+        self.value = value
+
+    def __str__(self):
+        return self.orig
+
+class NamedEntity(LinePart):
+    """A named entity"""
+    def __init__(self, orig: str, prefix: int, extent: int, text: str, ne_type: str):
+        self.orig = orig
+        self.text = text
+        self.prefix = prefix
+        self.extent = extent
+        self.ne_type: Literal["top", "per", "soc", "src"] = ne_type
+
+    def __str__(self):
+        return self.text
+
+
+class OpenTagUser(LinePart):
+    """A custom tag added by a specific user"""
+    def __init__(self, orig: str, user: str, t_type: str, t_subtype: str, t_subsubtype: str):
+        self.orig = orig
+        self.user = user
+        self.t_type = t_type
+        self.t_subtype = t_subtype
+        self.t_subsubtype = t_subsubtype
+
+    def __str__(self):
+        return self.value
+
+
+class OpenTagAuto(LinePart):
+    """A custom tag added automatically"""
+    def __init__(self, orig: str, resp: str, t_type: str, category: str, review: str):
+        self.orig = orig
+        self.resp = resp
+        self.t_type = t_type
+        self.category = category
+        self.review = review
+
+    def __str__(self):
+        return self.value
+
+
+class Milestone(LinePart):
+    """Milestone typically used for splitting text in 300-word blocks"""
+    def __str__(self):
+        return ""
+
+
+class Isnad(LinePart):
+    """An isnād part of a riwāyaŧ unit"""
+
+
+class Matn(LinePart):
+    """A matn part of a riwāyaŧ unit"""
+
+
+class Hukm(LinePart):
+    """A ḥukm part of a riwāyaŧ unit"""
+
+
+class Line:
+    """A line of text that may contain parts"""
+    def __init__(self, orig: str, text_only: str, parts: List[LinePart] = None):
+        self.orig = orig
+        self.text_only = text_only
+        if (parts is None):
+            self.parts = []
+        else:
+            self.parts = parts
+
+    def add_part(self, part: LinePart):
+        self.parts.append(part)
+
+    def __str__(self):
+        return "".join([str(p) for p in self.parts])
+
+
+class PageNumber():
+    """A page and volume number. Can be Content or LinePart object"""
+    def __init__(self, orig: str, vol: str, page: str):
+        self.orig = orig
+        self.page = page
+        self.volume = vol
+
+    def __str__(self):
+        return f"Vol. {self.volume}, p. {self.page}"
+
+
+class Content:
+    """A content structure"""
+    def __init__(self, orig: str):
+        self.orig = orig
+
+    def __str__(self):
+        return self.orig
+
+
+class Verse(Line):
+    """A line of poetry"""
+
+class Hemistich(LinePart):
+    """Tags the beginning of a hemistic in a verse"""
+
+
+class Paragraph(Content):
+    """Marks the beginning of a paragraph"""
+    def __init__(self, orig = "#"):
+        self.orig = orig
+
+    def __str__(self):
+        return ""
+
+class SectionHeader(Content):
+    """A section header"""
+    def __init__(self, orig: str, value: str, level: int):
+        self.orig = orig
+        self.value = value
+        self.level = level
+
+    def __str__(self):
+        return self.value
+
+
+class Editorial(Content):
+    """Marks the beginning of an editorial section"""
+    def __init__(self, orig: str):
+        self.orig = orig
+
+    def __str__(self):
+        return ""
+
+
+class DictionaryUnit(Content):
+    """Marks a dictionary unit"""
+    def __init__(self, orig: str, dic_type: str):
+        self.orig = orig
+        self.dic_type: Literal["nit", "top", "lex", "bib"] = dic_type
+
+    def __str__(self):
+        return ""
+
+
+class BioOrEvent(Content):
+    """Marks a biography or an event"""
+    def __init__(self, orig: str, be_type: str):
+        self.orig = orig
+        self.be_type: Literal["man", "wom", "ref", "names", "event", "events"] = be_type
+
+    def __str__(self):
+        return ""
+
+
+class DoxographicalItem(Content):
+    """Marks a doxographical section"""
+    def __init__(self, orig: str, dox_type: str):
+        self.orig = orig
+        self.dox_type: Literal["pos", "sec"] = dox_type
+
+    def __str__(self):
+        return self.value
+
+
+class MorphologicalPattern(Content):
+    """A milestone to tag passages that can be categorized thematically."""
+    def __init__(self, orig: str, category: str):
+        self.orig = orig
+        self.category = category
+
+    def __str__(self):
+        return ""
+
+
+class AdministrativeRegion(Content):
+    """An administrative region"""
+    # TODO
+
+    def __str__(self):
+        return ""
+
+
+class RouteOrDistance(Line):
+    """A route or distance"""
+
+
+class RouteFrom(LinePart):
+    """Origin of a Route"""
+
+
+class RouteTowa(LinePart):
+    """Destination of a Route"""
+
+
+class RouteDist(LinePart):
+    """Distance of a Route"""
+
+
+class Riwayat(Paragraph):
+    """Riwāyāt unit"""
+
+
+class Document:
+    """The OpenITI mARkdown document"""
+    def __init__(self, text):
+        self.orig_text = text
+        self.simple_metadata = []
+        self.content = []
+
+    def set_magic_value(self, orig: str):
+        self.magic_value = MagicValue(orig)
+
+    def set_simple_metadata_field(self, orig: str, value: str):
+        self.simple_metadata.append(SimpleMetadataField(orig, value))
+
+    def add_content(self, content: Content):
+        self.content.append(content)
+
+    def get_clean_text(self, includeMetadata: bool = False):
+        text = ""
+        if (includeMetadata):
+            text += "Metadata:\n"
+            text += "\n".join([str(md) for md in self.simple_metadata])
+            text += "\n\n"
+
+        text += "\n".join([str(c) for c in self.content])
+
+        return text
+
+    def __str__(self):
+        return self.orig_text
diff --git a/tests/batch.py b/tests/batch.py
new file mode 100644
index 0000000..1298482
--- /dev/null
+++ b/tests/batch.py
@@ -0,0 +1,33 @@
+import sys
+import os
+import urllib.request
+# import traceback
+sys.path.append(
+    os.path.abspath(os.path.join(os.path.dirname(__file__), os.path.pardir)))
+import oimdp
+
+
+if __name__ == "__main__":
+
+    response = urllib.request.urlopen(
+        "https://raw.githubusercontent.com/OpenITI/RELEASE/master/OpenITI_metatdata_2019_1_1"
+    )
+    release = response.read()
+    release = release.decode('utf-8')
+
+    for line in release.split("\n"):
+        url = line.split('\t')[7]
+        if (url.endswith('mARkdown') or url.endswith('completed')):
+            # get file from GitHub
+            print("Parsing " + url)
+            try:
+                response = urllib.request.urlopen(url)
+                data = response.read()
+                text = data.decode('utf-8')
+                try:
+                    oimdp.parse(text)
+                except Exception as identifier:
+                    # print(traceback.format_exc())
+                    print("\tERR: ", identifier)
+            except Exception as identifier:
+                print("\t", identifier)
diff --git a/tests/test.py b/tests/test.py
new file mode 100644
index 0000000..ccf89bb
--- /dev/null
+++ b/tests/test.py
@@ -0,0 +1,263 @@
+import sys
+import os
+sys.path.append(
+    os.path.abspath(os.path.join(os.path.dirname(__file__), os.path.pardir)))
+import unittest 
+import oimdp
+from oimdp.structures import Age, BioOrEvent, Date, DictionaryUnit, Document, DoxographicalItem, Editorial, Hemistich, Hukm, Isnad, Line, Matn, Milestone, MorphologicalPattern, NamedEntity, OpenTagAuto, OpenTagUser, PageNumber, Paragraph, Riwayat, RouteDist, RouteFrom, RouteOrDistance, RouteTowa, SectionHeader, TextPart, Verse
+
+
+class TestStringMethods(unittest.TestCase):
+
+    def __init__(self, *args, **kwargs):
+        super(TestStringMethods, self).__init__(*args, **kwargs)
+        root = os.path.dirname(__file__)
+        filepath = os.path.join(
+            root, "test.md"
+        )
+        test_file = open(filepath, "r")
+        self.text = test_file.read()
+        test_file.close()
+        self.parsed = oimdp.parse(self.text)
+
+    def generic_check(self, datatype, location: int, type: str, property: str = ""):
+            content = self.parsed.content[location]
+            self.assertTrue(isinstance(content, datatype))
+            if (len(property) > 0):
+                self.assertEqual(getattr(content, property), type)
+
+    def test_magic(self):
+        self.assertEqual(str(self.parsed.magic_value), "######OpenITI#")
+
+    def test_meta(self):
+        self.assertEqual(str(self.parsed.simple_metadata[1]),
+                         "000.SortField	:: Shamela_0023833")
+        self.assertEqual(str(self.parsed.simple_metadata[-1]),
+                         "999.MiscINFO	:: NODATA")
+
+    def test_document(self):
+        self.assertTrue(isinstance(self.parsed, Document))
+
+    def test_page(self):
+        self.assertTrue(isinstance(self.parsed.content[1].parts[1], 
+                        PageNumber))
+        self.assertEqual(str(self.parsed.content[1].parts[1]),
+                         "Vol. 00, p. 000")
+
+    def test_bio_or_event(self):
+        def check(location: int, type: str):
+            self.generic_check(BioOrEvent, location, type, "be_type")
+
+        check(2, "man")
+        self.assertEqual(str(self.parsed.content[3].parts[0]),
+            " أبو عمرو ابن العلاء واسمه")
+        check(8, "man")
+        self.assertEqual(str(self.parsed.content[9].parts[0]),
+            " أبو عمرو ابن العلاء واسمه")
+        check(14, "wom")
+        self.assertEqual(str(self.parsed.content[15].parts[0]),
+            " 1729 - صمعة بنت أحمد بن محمد بن عبيد الله الرئيس النيسابورية من ولد عثمان بن")
+        check(17, "wom")
+        self.assertEqual(str(self.parsed.content[18].parts[0]),
+            " 1729 - صمعة بنت أحمد بن محمد بن عبيد الله الرئيس النيسابورية من ولد عثمان بن")
+        check(20, "ref")
+        self.assertEqual(str(self.parsed.content[21].parts[0]),
+            " [a cross-reference, for both men and women]")
+        check(23, "ref")
+        self.assertEqual(str(self.parsed.content[24].parts[0]),
+            " [a cross-reference, for both men and women]")
+        check(26, "names")
+        self.assertEqual(str(self.parsed.content[27].parts[0]),
+            " -وفيها ولد: (@)(@@) المحدث عفيف ")
+        check(29, "names")
+        self.assertEqual(str(self.parsed.content[30].parts[0]),
+            " -وفيها ولد: (@)(@@) المحدث عفيف ")
+        check(32, "events")
+        check(34, "event")
+        check(49, "man")
+
+    def test_dictionary_unit(self):
+        def check(location: int, type: str):
+            self.generic_check(DictionaryUnit, location, type, "dic_type")
+        
+        check(36, "nis")
+        check(38, "top")
+        check(40, "lex")
+        check(42, "bib")
+
+    def test_doxographical(self):
+        def check(location: int, type: str):
+            self.generic_check(DoxographicalItem, location, type, "dox_type")
+
+        check(44, "pos")
+        check(46, "sec")
+
+    def test_editorial(self):
+        self.assertTrue(isinstance(self.parsed.content[48], Editorial))
+
+    def test_morphological(self):
+        self.assertTrue(isinstance(self.parsed.content[50], MorphologicalPattern))
+        self.assertTrue(self.parsed.content[50].category, "onomastic")
+    
+    def test_paragraph(self):
+        self.assertTrue(isinstance(self.parsed.content[51], Paragraph))
+
+    def test_line(self):
+        self.assertTrue(isinstance(self.parsed.content[52], Line))
+        self.assertTrue(isinstance(self.parsed.content[53], Line))
+        ## Check line parts on 53
+
+    def test_milestone(self):
+        self.assertTrue(isinstance(self.parsed.content[67], Line))
+        self.assertTrue(isinstance(self.parsed.content[67].parts[1], Milestone))
+
+    def test_named_entities(self):
+        self.assertTrue(isinstance(self.parsed.content[53].parts[1], Date))
+        self.assertEqual(self.parsed.content[53].parts[1].date_type, "death")
+
+        self.assertTrue(isinstance(self.parsed.content[68].parts[1], Date))
+        self.assertEqual(self.parsed.content[68].parts[1].date_type, "birth")
+        self.assertEqual(self.parsed.content[68].parts[1].value, "597")
+
+        self.assertTrue(isinstance(self.parsed.content[69].parts[1], Date))
+        self.assertEqual(self.parsed.content[69].parts[1].date_type, "other")
+        self.assertEqual(self.parsed.content[69].parts[1].value, "597")
+
+        self.assertTrue(isinstance(self.parsed.content[70].parts[1], Age))
+        self.assertEqual(self.parsed.content[70].parts[1].value, "059")
+
+        self.assertTrue(isinstance(self.parsed.content[71].parts[1], NamedEntity))
+        self.assertEqual(self.parsed.content[71].parts[1].ne_type, "soc")
+        self.assertEqual(self.parsed.content[71].parts[1].prefix, 0)
+        self.assertEqual(self.parsed.content[71].parts[1].extent, 2)
+        self.assertEqual(self.parsed.content[71].parts[1].text, 'معمر شيخ: ')
+        self.assertEqual(self.parsed.content[71].parts[2].text, 'واسط.. 1"018: نزيل: ')
+
+        self.assertTrue(isinstance(self.parsed.content[72].parts[1], NamedEntity))
+        self.assertEqual(self.parsed.content[72].parts[1].ne_type, "soc")
+        self.assertEqual(self.parsed.content[72].parts[1].prefix, 1)
+        self.assertEqual(self.parsed.content[72].parts[1].extent, 3)
+
+        self.assertTrue(isinstance(self.parsed.content[73].parts[1], NamedEntity))
+        self.assertEqual(self.parsed.content[73].parts[1].ne_type, "top")
+        self.assertEqual(self.parsed.content[73].parts[1].prefix, 0)
+        self.assertEqual(self.parsed.content[73].parts[1].extent, 2)
+
+        self.assertTrue(isinstance(self.parsed.content[74].parts[1], NamedEntity))
+        self.assertEqual(self.parsed.content[74].parts[1].ne_type, "top")
+        self.assertEqual(self.parsed.content[74].parts[1].prefix, 1)
+        self.assertEqual(self.parsed.content[74].parts[1].extent, 3)
+
+        self.assertTrue(isinstance(self.parsed.content[75].parts[1], NamedEntity))
+        self.assertEqual(self.parsed.content[75].parts[1].ne_type, "per")
+        self.assertEqual(self.parsed.content[75].parts[1].prefix, 0)
+        self.assertEqual(self.parsed.content[75].parts[1].extent, 2)
+
+        self.assertTrue(isinstance(self.parsed.content[76].parts[1], NamedEntity))
+        self.assertEqual(self.parsed.content[76].parts[1].ne_type, "per")
+        self.assertEqual(self.parsed.content[76].parts[1].prefix, 1)
+        self.assertEqual(self.parsed.content[76].parts[1].extent, 3)
+
+        self.assertTrue(isinstance(self.parsed.content[77].parts[1], NamedEntity))
+        self.assertEqual(self.parsed.content[77].parts[1].ne_type, "src")
+        self.assertEqual(self.parsed.content[77].parts[1].prefix, 0)
+        self.assertEqual(self.parsed.content[77].parts[1].extent, 3)
+
+    def test_opentags(self):
+        self.assertTrue(isinstance(self.parsed.content[79].parts[1], OpenTagUser))
+        self.assertEqual(self.parsed.content[79].parts[1].user, "USER")
+        self.assertEqual(self.parsed.content[79].parts[1].t_type, "CAT")
+        self.assertEqual(self.parsed.content[79].parts[1].t_subtype, "SUBCAT")
+        self.assertEqual(self.parsed.content[79].parts[1].t_subsubtype, "SUBSUBCAT")
+
+    def test_opentagsauto(self):
+        self.assertTrue(isinstance(self.parsed.content[81].parts[1], OpenTagAuto))
+        self.assertEqual(self.parsed.content[81].parts[1].resp, "RES")
+        self.assertEqual(self.parsed.content[81].parts[1].t_type, "TYPE")
+        self.assertEqual(self.parsed.content[81].parts[1].category, "Category")
+        self.assertEqual(self.parsed.content[81].parts[1].review, "fr")
+
+    def test_riwayat(self):
+        self.assertTrue(isinstance(self.parsed.content[54], Riwayat))
+        self.assertTrue(isinstance(self.parsed.content[55], Line))
+        self.assertTrue(isinstance(self.parsed.content[55].parts[0], Isnad))
+        self.assertTrue(isinstance(self.parsed.content[55].parts[1], TextPart))
+        self.assertEqual(self.parsed.content[55].parts[1].orig, " this section contains isnād ")
+        
+        self.assertTrue(isinstance(self.parsed.content[55].parts[2], Matn))
+        self.assertTrue(isinstance(self.parsed.content[55].parts[3], TextPart))
+        self.assertEqual(self.parsed.content[55].parts[3].orig, " this section")
+
+        self.assertTrue(isinstance(self.parsed.content[56], Line))
+        self.assertTrue(isinstance(self.parsed.content[56].parts[0], TextPart))
+        self.assertEqual(self.parsed.content[56].parts[0].orig, " contains matn ")
+
+        self.assertTrue(isinstance(self.parsed.content[56].parts[1], Hukm))
+        self.assertTrue(isinstance(self.parsed.content[56].parts[2], TextPart))
+        self.assertEqual(self.parsed.content[56].parts[2].orig, " this section contains ḥukm .")
+
+    def test_route_or_distance(self):
+        self.assertTrue(isinstance(self.parsed.content[57], RouteOrDistance))
+        self.assertTrue(isinstance(self.parsed.content[57].parts[0], RouteFrom))
+        self.assertTrue(isinstance(self.parsed.content[57].parts[1], TextPart))
+        self.assertEqual(self.parsed.content[57].parts[1].orig, " toponym ")
+
+        self.assertTrue(isinstance(self.parsed.content[57].parts[2], RouteTowa))
+        self.assertTrue(isinstance(self.parsed.content[57].parts[3], TextPart))
+        self.assertEqual(self.parsed.content[57].parts[3].orig, " toponym ")
+
+        self.assertTrue(isinstance(self.parsed.content[57].parts[4], RouteDist))
+        self.assertTrue(isinstance(self.parsed.content[57].parts[5], TextPart))
+        self.assertEqual(self.parsed.content[57].parts[5].orig, " distance_as_recorded")
+
+    def test_section_headers(self):
+        self.assertTrue(isinstance(self.parsed.content[58], SectionHeader))
+        self.assertEqual(self.parsed.content[58].value, " ذكر سرد النسب الزكي من محمد صلى الله عليه وآله وسلم، إلى آدم عليه السلام")
+        self.assertEqual(self.parsed.content[58].level, 1)
+
+        self.assertTrue(isinstance(self.parsed.content[59], SectionHeader))
+        self.assertEqual(self.parsed.content[59].value, " (نهج ابن هشام في هذا الكتاب) :")
+        self.assertEqual(self.parsed.content[59].level, 2)
+
+        self.assertTrue(isinstance(self.parsed.content[60], SectionHeader))
+        self.assertEqual(self.parsed.content[60].value, " (نهج ابن هشام في هذا الكتاب) :")
+        self.assertEqual(self.parsed.content[60].level, 3)
+
+        self.assertTrue(isinstance(self.parsed.content[61], SectionHeader))
+        self.assertEqual(self.parsed.content[61].value, " (نهج ابن هشام في هذا الكتاب) :")
+        self.assertEqual(self.parsed.content[61].level, 4)
+
+        self.assertTrue(isinstance(self.parsed.content[62], SectionHeader))
+        self.assertEqual(self.parsed.content[62].value, " (نهج ابن هشام في هذا الكتاب) :")
+        self.assertEqual(self.parsed.content[62].level, 5)
+
+    def test_verse(self):
+        self.assertTrue(isinstance(self.parsed.content[63], Verse))
+        self.assertTrue(isinstance(self.parsed.content[63].parts[0], TextPart))
+        self.assertEqual(self.parsed.content[63].parts[0].orig, " وجمع العرب تحت لواء الرسول محمد عليه الصلاة ")
+
+        self.assertTrue(isinstance(self.parsed.content[63].parts[1], Hemistich))
+        self.assertEqual(self.parsed.content[63].parts[1].orig, "%~%")
+
+        self.assertTrue(isinstance(self.parsed.content[63].parts[2], TextPart))
+        self.assertEqual(self.parsed.content[63].parts[2].orig, " والسلام، وما يضاف إلى ذلك من")
+
+        self.assertTrue(isinstance(self.parsed.content[64], Verse))
+        self.assertTrue(isinstance(self.parsed.content[64].parts[0], TextPart))
+        self.assertEqual(self.parsed.content[64].parts[0].orig, " ")
+        self.assertTrue(isinstance(self.parsed.content[64].parts[1], Hemistich))
+        self.assertEqual(self.parsed.content[64].parts[1].orig, "%~%")
+        self.assertTrue(isinstance(self.parsed.content[64].parts[2], TextPart))
+        self.assertEqual(self.parsed.content[64].parts[2].orig, " وجمع العرب تحت لواء الرسول محمد عليه الصلاة  والسلام، وما يضاف إلى ذلك من")
+
+        self.assertTrue(isinstance(self.parsed.content[65], Verse))
+        self.assertTrue(isinstance(self.parsed.content[65].parts[1], Hemistich))
+        self.assertEqual(self.parsed.content[65].parts[1].orig, "%~%")
+        self.assertTrue(isinstance(self.parsed.content[65].parts[0], TextPart))
+        self.assertEqual(self.parsed.content[65].parts[0].orig, " جمع العرب تحت لواء الرسول محمد عليه الصلاة  والسلام، وما يضاف إلى ذلك من")
+
+    # TODO: ADMINISTRATIVE REGIONS!
+
+
+if __name__ == "__main__":
+    unittest.main()