Initialize repo for public release

mauna-ai · Feb 25, 2020 · 4e02fe4 · 4e02fe4
1 parent bc43363
commit 4e02fe4
Show file tree

Hide file tree

Showing 47 changed files with 43,973 additions and 2 deletions.
diff --git a/README.md b/README.md
@@ -1,2 +1,80 @@
-# comet-public
-A Public repository for the COMeT model
+To run a generation experiment (either conceptnet or atomic), follow these instructions:
+
+
+<h1>Installing Dependencies</h1>
+
+First clone, the repo:
+
+```
+git clone https://github.com/atcbosselut/comet.git
+cd comet
+```
+
+Then run the setup scripts to acquire the pretrained model files from OpenAI, as well as the ATOMIC and ConceptNet datasets
+
+```
+bash scripts/setup/get_atomic_data.sh
+bash scripts/setup/get_conceptnet_data.sh
+bash scripts/setup/get_model_files.sh
+```
+
+Then install dependencies (assuming you already have Python 3.6 and Pytorch >= 1.0:
+
+```
+pip install torch==1.0
+pip install tensorflow
+pip install ftfy==5.1
+conda install -c conda-forge spacy
+python -m spacy download en
+pip install tensorboardX
+pip install tqdm
+pip install pandas
+pip install ipython
+```
+
+<h1> Installing the Package </h1>
+
+Run the following command:
+
+```
+git checkout package
+pip install .
+```
+
+You should now be able to use most COMeT functionality!
+
+<h1> Launching a demo </h1>
+
+First, download the pretrained models from the following link:
+
+```
+https://drive.google.com/open?id=17TYbeEGgKslFzmfe-TRFKBWiH5F0CSm1
+```
+
+Then untar the file:
+
+```
+tar -xvzf pretrained_models.tar.gz
+```
+
+Then to launch the demo, do the following:
+
+```
+from comet.interactive.atomic_demo import DemoModel
+
+demo_model = DemoModel("/path/to/pretrained_model")
+
+demo_model.predict("PersonX goes to the mall", "xEffect", "beam-10")
+
+```
+
+Or for ConceptNet
+
+```
+from comet.interactive.conceptnet_demo import DemoModel
+
+demo_model = DemoModel("/path/to/pretrained_model")
+
+demo_model.predict("man with axe", "CapableOf", "beam-10")
+
+```
diff --git a/__init__.py b/__init__.py
diff --git a/config/atomic/changes.json b/config/atomic/changes.json
@@ -0,0 +1,16 @@
+{
+    "base": {
+        "0": {
+            "gpu_index": 0
+        },
+        "1": {
+            "gpu_index": 1
+        },
+        "2": {
+            "gpu_index": 2
+        },
+        "3": {
+            "gpu_index": 3
+        }
+    }
+}
diff --git a/config/atomic/default.json b/config/atomic/default.json
@@ -0,0 +1,15 @@
+{
+
+    "dataset": "atomic",
+    "categories": ["oReact", "oEffect", "oWant", "xAttr", "xEffect", "xIntent", "xNeed", "xReact", "xWant"],
+    "eval_categories": ["oReact", "oEffect", "oWant", "xAttr", "xEffect", "xIntent", "xNeed", "xReact", "xWant"],
+    "exp": "generation",
+    "labels": "individual",
+    "encoder_path": "model/encoder_bpe_40000.json",
+    "bpe_path": "model/vocab_40000.bpe",
+    "batch_size": 64,
+    "learning_rate_schedule": "warmup_linear",
+    "learning_rate_warmup": 0.002,
+    "l2": 0.01,
+    "vector_l2": "T"
+}
diff --git a/config/atomic/eval_changes.json b/config/atomic/eval_changes.json
@@ -0,0 +1,25 @@
+{
+
+    "base": {
+        "0": {
+            "gpu_index": 0,
+            "generate_sequences": "full",
+            "evaluate_sequences": "full"
+        },
+        "1": {
+            "gpu_index": 1,
+            "generate_sequences": "full",
+            "evaluate_sequences": "full"
+        },
+        "2": {
+            "gpu_index": 2,
+            "generate_sequences": "full",
+            "evaluate_sequences": "full"
+        },
+        "3": {
+            "gpu_index": 3,
+            "generate_sequences": "full",
+            "evaluate_sequences": "full"
+        }
+    }
+}
diff --git a/config/conceptnet/changes.json b/config/conceptnet/changes.json
@@ -0,0 +1,16 @@
+{
+    "base": {
+        "0": {
+            "gpu_index": 0
+        },
+        "1": {
+            "gpu_index": 1
+        },
+        "2": {
+            "gpu_index": 2
+        },
+        "3": {
+            "gpu_index": 3
+        }
+    }
+}
diff --git a/config/conceptnet/default.json b/config/conceptnet/default.json
@@ -0,0 +1,23 @@
+{
+
+    "dataset": "conceptnet",
+    "exp": "generation",
+    "do_gen": "T",
+    "encoder_path": "model/encoder_bpe_40000.json",
+    "bpe_path": "model/vocab_40000.bpe",
+    "batch_size": 64,
+    "learning_rate_schedule": "warmup_linear",
+    "learning_rate_warmup": 0.002,
+    "l2": 0.01,
+    "vector_l2": "T",
+    "generate_sequences": "full",
+    "evaluate_sequences": "full",
+    "relation_format": "language",
+    "training_set_size": 100,
+    "development_set_versions_to_use": "12",
+    "max_event_1_size": 10,
+    "max_event_2_size": 15,
+    "eval_sampler": "greedy",
+    "iterations": 100000,
+    "learning_rate": 1e-5
+}
diff --git a/config/conceptnet/eval_changes.json b/config/conceptnet/eval_changes.json
@@ -0,0 +1,17 @@
+{
+
+    "base": {
+        "0": {
+            "gpu_index": 0
+        },
+        "1": {
+            "gpu_index": 1
+        },
+        "2": {
+            "gpu_index": 2
+        },
+        "3": {
+            "gpu_index": 3
+        }
+    }
+}
diff --git a/config/default.json b/config/default.json
@@ -0,0 +1,54 @@
+{
+    "gpu_mode": "T",
+    "gpu_index": 0,
+    "gpu_indices": [0, 1],
+    "multigpu": "F",
+
+    "topk_size": 10,
+    "beam_size": 1,
+    "gen_seqlength": 40,
+    "eval_sampler": "greedy",
+    "num_sequences": 1,
+    "generate_sequences": 1000,
+    "evaluate_sequences": 1000,
+
+    "random_seed": 123,
+    "optimizer": "adam",
+    "batch_size": 64,
+    "learning_rate": 6.25e-5,
+
+    "clip": 1,
+    "loss": "nll",
+    "weight_decay": 0,
+
+    "adam": {
+        "b2": 0.999,
+        "b1": 0.9,
+        "e": 1e-8
+    },
+
+    "model": "transformer",
+    "pretrain": "gpt",
+    "hidden_dim": 768,
+    "num_layers": 12,
+    "num_heads": 12,
+    "embedding_dropout": 0.1,
+    "attention_dropout": 0.1,
+    "residual_dropout": 0.1,
+    "output_dropout": 0.1,
+    "activation": "gelu",
+    "init": "pt",
+
+    "trainer": "iteration",
+
+    "iterations": 50000,
+    "cycle": 500,
+
+    "save_strategy": "best",
+
+    "epochs": 20,
+    "toy": "F",
+    "do_gen": "F",
+    "save": "T",
+    "test_save": "F"
+}
diff --git a/data/__init__.py b/data/__init__.py