diff --git a/src/NlpTools/Models/Lda.php b/src/NlpTools/Models/Lda.php index bec01d2..bb159f9 100644 --- a/src/NlpTools/Models/Lda.php +++ b/src/NlpTools/Models/Lda.php @@ -225,7 +225,7 @@ public function getPhi($limit_words=-1) * Get the probability of a document given a topic (theta according * to Griffiths and Steyvers) * - * @param $limit_docs Limit the results to the top n docs + * @param $limit_docs Limit the results to the top n words * @return array A two dimensional array that contains the probabilities for each document */ public function getDocumentsPerTopicsProbabilities($limit_docs=-1) @@ -239,21 +239,28 @@ public function getDocumentsPerTopicsProbabilities($limit_docs=-1) $denom = $doccnt + $this->ntopics*$this->a; $count_topics_docs = array(); foreach ($this->count_docs_topics as $doc=>$topics) { - foreach ($topics as $t=>$c) - $count_topics_docs[$doc][$t]++; + foreach ($topics as $t=>$c) { + if (!isset($count_topics_docs[$doc])) { + $count_topics_docs[$doc] = array(); + } + if (!isset($count_topics_docs[$doc][$t])) { + $count_topics_docs[$doc][$t] = 0; + } + $count_topics_docs[$doc][$t]+=$c; + } } foreach ($p_t_d as $topic=>&$p) { foreach ($count_topics_docs as $doc=>$tc) { $p[$doc] = ($tc[$topic] + $this->a)/$denom; } - if ($limit_words>0) { + if ($limit_docs>0) { arsort($p); - $p = array_slice($p,0,$limit_words,true); // true to preserve the keys + $p = array_slice($p,0,$limit_docs,true); // true to preserve the keys } } - return $p; + return $p_t_d; } /**