From ad88df681b05706b971ddd14875d671a0cc58cf1 Mon Sep 17 00:00:00 2001 From: Slava Vishnyakov Date: Sat, 21 Apr 2018 13:02:01 +0300 Subject: [PATCH 1/6] First fix for #64 --- src/NlpTools/Models/Lda.php | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/src/NlpTools/Models/Lda.php b/src/NlpTools/Models/Lda.php index bec01d2..66e374d 100644 --- a/src/NlpTools/Models/Lda.php +++ b/src/NlpTools/Models/Lda.php @@ -239,8 +239,15 @@ public function getDocumentsPerTopicsProbabilities($limit_docs=-1) $denom = $doccnt + $this->ntopics*$this->a; $count_topics_docs = array(); foreach ($this->count_docs_topics as $doc=>$topics) { - foreach ($topics as $t=>$c) - $count_topics_docs[$doc][$t]++; + foreach ($topics as $t=>$c) { + if (!isset($count_topics_docs[$doc])) { + $count_topics_docs[$doc] = array(); + } + if (!isset($count_topics_docs[$doc][$t])) { + $count_topics_docs[$doc][$t] = 0; + } + $count_topics_docs[$doc][$t]++; + } } foreach ($p_t_d as $topic=>&$p) { From d72071cbc1821edae8b6d7f2d591969283a6138a Mon Sep 17 00:00:00 2001 From: Slava Vishnyakov Date: Sat, 21 Apr 2018 13:05:17 +0300 Subject: [PATCH 2/6] Fix unused variable --- src/NlpTools/Models/Lda.php | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/NlpTools/Models/Lda.php b/src/NlpTools/Models/Lda.php index bec01d2..4459da3 100644 --- a/src/NlpTools/Models/Lda.php +++ b/src/NlpTools/Models/Lda.php @@ -225,10 +225,10 @@ public function getPhi($limit_words=-1) * Get the probability of a document given a topic (theta according * to Griffiths and Steyvers) * - * @param $limit_docs Limit the results to the top n docs + * @param $limit_words Limit the results to the top n words * @return array A two dimensional array that contains the probabilities for each document */ - public function getDocumentsPerTopicsProbabilities($limit_docs=-1) + public function getDocumentsPerTopicsProbabilities($limit_words=-1) { $p_t_d = array_fill_keys( range(0,$this->ntopics-1), From a1f71b9dd71eaf8d72bff7246daffb6d56a444f6 Mon Sep 17 00:00:00 2001 From: Slava Vishnyakov Date: Sat, 21 Apr 2018 13:08:29 +0300 Subject: [PATCH 3/6] Fix unused variable --- src/NlpTools/Models/Lda.php | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/NlpTools/Models/Lda.php b/src/NlpTools/Models/Lda.php index 4459da3..71361aa 100644 --- a/src/NlpTools/Models/Lda.php +++ b/src/NlpTools/Models/Lda.php @@ -259,9 +259,9 @@ public function getDocumentsPerTopicsProbabilities($limit_words=-1) /** * Shortcut to getDocumentsPerTopicsProbabilities */ - public function getTheta($limit_docs=-1) + public function getTheta($limit_words=-1) { - return $this->getDocumentsPerTopicsProbabilities($limit_docs); + return $this->getDocumentsPerTopicsProbabilities($limit_words); } /** From 6bcf1885ed3428313c24563280f4f1198194abe5 Mon Sep 17 00:00:00 2001 From: Slava Vishnyakov Date: Sat, 21 Apr 2018 13:25:50 +0300 Subject: [PATCH 4/6] Fix algorithm --- src/NlpTools/Models/Lda.php | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/NlpTools/Models/Lda.php b/src/NlpTools/Models/Lda.php index bec01d2..efde3cf 100644 --- a/src/NlpTools/Models/Lda.php +++ b/src/NlpTools/Models/Lda.php @@ -240,7 +240,7 @@ public function getDocumentsPerTopicsProbabilities($limit_docs=-1) $count_topics_docs = array(); foreach ($this->count_docs_topics as $doc=>$topics) { foreach ($topics as $t=>$c) - $count_topics_docs[$doc][$t]++; + $count_topics_docs[$doc][$t]+=$c; } foreach ($p_t_d as $topic=>&$p) { @@ -253,7 +253,7 @@ public function getDocumentsPerTopicsProbabilities($limit_docs=-1) } } - return $p; + return $p_t_d; } /** From 37ff7f46fa0806f863f6af661285c0303dd72ba1 Mon Sep 17 00:00:00 2001 From: Slava Vishnyakov Date: Sat, 21 Apr 2018 13:32:14 +0300 Subject: [PATCH 5/6] Wrong fix --- src/NlpTools/Models/Lda.php | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/NlpTools/Models/Lda.php b/src/NlpTools/Models/Lda.php index 71361aa..8a2b73b 100644 --- a/src/NlpTools/Models/Lda.php +++ b/src/NlpTools/Models/Lda.php @@ -225,10 +225,10 @@ public function getPhi($limit_words=-1) * Get the probability of a document given a topic (theta according * to Griffiths and Steyvers) * - * @param $limit_words Limit the results to the top n words + * @param $limit_docs Limit the results to the top n words * @return array A two dimensional array that contains the probabilities for each document */ - public function getDocumentsPerTopicsProbabilities($limit_words=-1) + public function getDocumentsPerTopicsProbabilities($limit_docs=-1) { $p_t_d = array_fill_keys( range(0,$this->ntopics-1), @@ -247,9 +247,9 @@ public function getDocumentsPerTopicsProbabilities($limit_words=-1) foreach ($count_topics_docs as $doc=>$tc) { $p[$doc] = ($tc[$topic] + $this->a)/$denom; } - if ($limit_words>0) { + if ($limit_docs>0) { arsort($p); - $p = array_slice($p,0,$limit_words,true); // true to preserve the keys + $p = array_slice($p,0,$limit_docs,true); // true to preserve the keys } } @@ -259,9 +259,9 @@ public function getDocumentsPerTopicsProbabilities($limit_words=-1) /** * Shortcut to getDocumentsPerTopicsProbabilities */ - public function getTheta($limit_words=-1) + public function getTheta($limit_docs=-1) { - return $this->getDocumentsPerTopicsProbabilities($limit_words); + return $this->getDocumentsPerTopicsProbabilities($limit_docs); } /** From 571c172285e0e70f44422334ecaaa6da1b35c6ce Mon Sep 17 00:00:00 2001 From: Slava Vishnyakov Date: Sat, 21 Apr 2018 13:46:03 +0300 Subject: [PATCH 6/6] Fix adding --- src/NlpTools/Models/Lda.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/NlpTools/Models/Lda.php b/src/NlpTools/Models/Lda.php index 0382690..bb159f9 100644 --- a/src/NlpTools/Models/Lda.php +++ b/src/NlpTools/Models/Lda.php @@ -246,7 +246,7 @@ public function getDocumentsPerTopicsProbabilities($limit_docs=-1) if (!isset($count_topics_docs[$doc][$t])) { $count_topics_docs[$doc][$t] = 0; } - $count_topics_docs[$doc][$t]++; + $count_topics_docs[$doc][$t]+=$c; } }