TASS18-Task3
diff --git a/‎.gitignore‎
Lines changed: 105 additions & 0 deletions b/‎.gitignore‎
Lines changed: 105 additions & 0 deletions
diff --git a/‎Readme.md‎
Lines changed: 8 additions & 2 deletions b/‎Readme.md‎
Lines changed: 8 additions & 2 deletions
diff --git a/‎evaluate.py‎
Lines changed: 9 additions & 253 deletions b/‎evaluate.py‎
Lines changed: 9 additions & 253 deletions
@@ -0,0 +1,105 @@
+
+# Created by https://www.gitignore.io/api/python
+
+### Python ###
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+
+# C extensions
+*.so
+
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.coverage
+.coverage.*
+.cache
+.pytest_cache/
+nosetests.xml
+coverage.xml
+*.cover
+.hypothesis/
+
+# Translations
+*.mo
+*.pot
+
+# Flask stuff:
+instance/
+.webassets-cache
+
+# Scrapy stuff:
+.scrapy
+
+# Sphinx documentation
+docs/_build/
+
+# PyBuilder
+target/
+
+# Jupyter Notebook
+.ipynb_checkpoints
+
+# pyenv
+.python-version
+
+# celery beat schedule file
+celerybeat-schedule.*
+
+# SageMath parsed files
+*.sage.py
+
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+
+# Spyder project settings
+.spyderproject
+.spyproject
+
+# Rope project settings
+.ropeproject
+
+# mkdocs documentation
+/site
+
+# mypy
+.mypy_cache/
+
+
+# End of https://www.gitignore.io/api/python
@@ -11,7 +11,7 @@ The files and folders are organized as follows:
 
 ## Development evaluation
 
-The file `evaluate.py` performs an automatic evaluation of your output files against the gold files. You can use this script to validate your technique(s). The metrics reported are exactly the same ones that will be used in the final evaluation. This script simply evaluates each pair of gold/dev files separately and outputs detailed information of all the mistakes.
+The file `evaluate.py` performs an automatic evaluation of your output files against the gold files. You can use this script to validate your technique(s). The metrics reported are exactly the same ones that will be used in the final evaluation. This script simply evaluates each pair of gold/dev files separately and outputs detailed information of all the mistakes. This file's output corresponds to the `Development evaluation...` sections in each of the subtasks.
 
 To run it simply use:
 
@@ -27,7 +27,13 @@ python3 evaluate.py example/gold example/dev
 
 ## Final evaluation
 
-The file `score.py` performs the final evaluation exactly as described in the competition rules, i.e., according to the three evaluation scenarios presented. It assumes the gold files are in `gold` and the files to be submitted are in the `submit` folder, according to the folder structure presented there.
+The file `score.py` performs the final evaluation exactly as described in the competition rules, i.e., according to the three evaluation scenarios presented. It assumes the gold files are in `gold` and the files to be submitted are in the `submit` folder, according to the folder structure presented there. This file's output is the one actually used in `Codalab` to rank competitors.
+
+```bash
+python3 score.py gold/ submit/
+```
+
+This script will output a file `score.txt` in the `submit` folder that contains the calculated metrics described in the `Overall evaluation...` section of the competition rules.
 
 ## Notes
 
 
@@ -5,260 +5,16 @@
 import sys
 import pprint
 import collections
-from os.path import abspath, join, exists
-
-def read_input(path):
-    with open(path, encoding='utf8') as fp:
-        return fp.read()
-
-
-def read_phrases(path):
-    phrases = {}
-
-    with open(path) as fp:
-        for line in fp:
-            idx, start, end = line.split()
-            phrases[int(idx)] = dict(
-                start=int(start),
-                end=int(end),
-            )
-
-    return phrases
-
-
-def read_labels(path):
-    labels = {}
-
-    with open(path) as fp:
-        for line in fp:
-            idx, label = line.split()
-            labels[int(idx)] = label
-
-    return labels
-
-
-def read_links(path):
-    links = []
-
-    with open(path) as fp:
-        for line in fp:
-            line = line.split()
-            rel = line[0]
-
-            try:
-                links.append(dict(
-                    rel=rel,
-                    arg1=int(line[1]),
-                    arg2=int(line[2]),
-                    arg3=None,
-                ))
-            except:
-                pass
-
-    return links
-
-
-def find_obj(objs, x):
-    if isinstance(objs, dict):
-        source = objs.items()
-    else:
-        source = zip(objs, objs)
-
-    for idx, l in source:
-        for field in l:
-            if l[field] != x[field]:
-                break
-        else:
-            return idx
+from tools import (get_span,
+                   read_input,
+                   read_phrases,
+                   read_links,
+                   read_labels,
+                   compare_phrases,
+                   compare_links,
+                   compare_labels)
 
-    return None
-
-
-def between(x, a, b):
-    return x >= a and x <= b
-
-
-def intersect(x1, y1, x2, y2):
-    if x1 >= y1:
-        return False
-    if x2 >= y2:
-        return False
-
-    return between(x1, x2, y2) or between(y1, x2, y2) or between(x2, x1, y1) or between(y2, x1, y1)
-
-
-def find_partial(objs, x):
-    fidx = find_obj(objs, x)
-
-    if fidx:
-        return fidx, True
-
-    start = x['start']
-    end = x['end']
-
-    for idx, l in objs.items():
-        sstart = l['start']
-        send = l['end']
-
-        if intersect(start, end, sstart, send):
-            return idx, False
-
-    return None, False
-
-
-def sort(items):
-    return sorted(items, key=lambda x: (x['start'], x['end']))
-
-
-def compare_phrases(gold_phrases, dev_phrases):
-    correct = []
-    missing = []
-    spurious = []
-    partial = []
-    mapping = {}
-
-    for idx, l in gold_phrases.items():
-        fidx, exact = find_partial(dev_phrases, l)
-
-        if fidx and not "eval:%i"%fidx in mapping:
-            if exact:
-                correct.append(l)
-            else:
-                partial.append((l, dev_phrases[fidx]))
-
-            mapping["ref:%i"%idx] = fidx
-            mapping["eval:%i"%fidx] = idx
-        else:
-            missing.append(l)
-
-    for fidx, l in dev_phrases.items():
-        if not "eval:%i"%fidx in mapping:
-            spurious.append(l)
-
-    return dict(
-        correct=sort(correct),
-        missing=sort(missing),
-        spurious=sort(spurious),
-        partial=partial,
-        mapping=mapping,
-    )
-
-
-def compare_labels(gold, dev, mapping):
-    confussion_matrix = collections.defaultdict(lambda: 0)
-
-    correct = []
-    incorrect = []
-    spurious = []
-    missing = []
-
-    for idx, l in gold.items():
-        fidx = mapping.get('ref:%i' % idx)
-
-        if not fidx:
-            missing.append(dict(id=idx, label=l))
-            continue
-
-        if not fidx in dev:
-            missing.append(dict(id=idx, label=l))
-            confussion_matrix[(l, 'None')] += 1
-
-        l2 = dev[fidx]
-        confussion_matrix[(l, l2)] += 1
-
-        if l == l2:
-            correct.append(dict(fidx=fidx, label=l2))
-        else:
-            incorrect.append(dict(fidx=fidx, label=l2, correct=l))
-
-    for fidx, l in dev.items():
-        if "eval:%i"%fidx in mapping:
-            continue
-
-        spurious.append(dict(fidx=fidx, label=l))
-
-    return dict(
-        confussion_matrix=confussion_matrix,
-        correct=correct,
-        incorrect=incorrect,
-        missing=missing,
-        spurious=spurious,
-    )
-
-
-def map_entities(x, mapping, map_key):
-    result = dict(
-        rel=x['rel'],
-        arg1 = None,
-        arg2 = None,
-        arg3 = None,
-    )
-
-    for key in ["arg1", "arg2", "arg3"]:
-        value = x[key]
-
-        if value is None:
-            result[key] = None
-            continue
-
-        mapped = map_key+":%i"%value
-
-        if not mapped in mapping:
-            return False
-
-        result[key] = mapping[mapped]
-
-    return result
-
-
-def find_relation(rel, relations):
-    for r in relations:
-        for k in ["rel", "arg1", "arg2", "arg3"]:
-            if r[k] != rel[k]:
-                break
-        else:
-            return True
-
-    return False
-
-
-def compare_links(gold_links, dev_links, mapping):
-    correct = []
-    missing = []
-    spurious = []
-
-    for rel in gold_links:
-        mapped = map_entities(rel, mapping, "ref")
-
-        if not mapped:
-            missing.append(rel)
-            continue
-
-        if not find_relation(mapped, dev_links):
-            missing.append(rel)
-            continue
-
-        correct.append(rel)
-
-    for rel in dev_links:
-        mapped = map_entities(rel, mapping, "eval")
-
-        if not mapped:
-            spurious.append(rel)
-            continue
-
-        if not find_relation(mapped, gold_links):
-            spurious.append(rel)
-
-    return dict(
-        correct=correct,
-        missing=missing,
-        spurious=spurious,
-    )
-
-
-def get_span(sentences, obj):
-    return sentences[obj["start"]:obj["end"]]
+from os.path import abspath, join, exists
 
 
 def evaluate_phrases(input_file, gold_phrases_file, dev_phrases_file):