Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

replaced all danfojs operations w/ tidyjs #8

Open
wants to merge 2 commits into
base: main
Choose a base branch
from
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1,077 changes: 64 additions & 1,013 deletions package-lock.json

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion package.json
Original file line number Diff line number Diff line change
@@ -7,13 +7,13 @@
"@testing-library/jest-dom": "^5.16.5",
"@testing-library/react": "^13.4.0",
"@testing-library/user-event": "^13.5.0",
"@tidyjs/tidy": "^2.5.2",
"@types/jest": "^27.5.2",
"@types/node": "^16.18.12",
"@types/react": "^18.0.28",
"@types/react-dom": "^18.0.11",
"await-to-js": "^3.0.0",
"bootstrap": "^5.3.0-alpha1",
"danfojs": "^1.1.2",
"echarts-for-react": "^3.0.2",
"react": "^18.2.0",
"react-dom": "^18.2.0",
25 changes: 14 additions & 11 deletions src/analysis/computeAccuracy.ts
Original file line number Diff line number Diff line change
@@ -1,15 +1,18 @@
import { DataFrame } from "danfojs";
import { tidy, filter, tally } from "@tidyjs/tidy";
import { LongSessionRow } from "../types";

const computeAccuracy = (session: LongSessionRow[]): number => {
const correct: number = tidy(
session,
filter(({ correct }) => correct === true),
tally()
)[0]["n"];
const incorrect: number = tidy(
session,
filter(({ correct, key }) => correct === false && key !== "Backspace"),
tally()
)[0]["n"];

const computeAccuracy = (session: DataFrame): number => {
// get everything that ins't backspace
const mask = session["key"].values.map((k: string) => k !== "Backspace");
const correctMask = session.query(mask)["correct"].values;
const incorrectMask = session
.query(mask)
["correct"].values.map((v: boolean) => v === false);
const correct = session.query(correctMask).values.length;
const incorrect = session.query(incorrectMask).values.length;
// console.log(correctMask, incorrectMask);
return +(correct / (correct + incorrect)).toFixed(2);
};

84 changes: 40 additions & 44 deletions src/analysis/computeNGram.ts
Original file line number Diff line number Diff line change
@@ -1,31 +1,49 @@
import { DataFrame } from "danfojs";
import { nGramFeedback, WordPerformance } from "../types";
import { tidy, filter, select } from "@tidyjs/tidy";
import { createFinalOutput } from "../bin/createFinalOutput";
import { pullColumns } from "../bin/pullColumns";
import { LongSessionRow, nGramFeedback, WordPerformance } from "../types";

const transformIntoWords = (input: DataFrame): WordPerformance[] => {
// create list of words w/ array of targetID's
interface NGramTarget {
nGram: string;
startIndex: number;
endIndex: number;
}

const transformIntoWords = (input: LongSessionRow[]): WordPerformance[] => {
// create list of words w/ list of each words instance's targetID's
const targets: { [key: string]: number[] } = {};
input.values.forEach((rv: any, rowIndex: number) => {
const row = input.iloc({ rows: [rowIndex] });
const { target: targetValues, targetID: targetIDValues } = row;
const target = targetValues.values[0];
const targetID = targetIDValues.values[0];
if (targets[target] === undefined) targets[target] = [targetID];
// create separate array of words so we can easily iterate through
const targetWords: string[] = [];
input.forEach((row: LongSessionRow) => {
const { target, targetID } = row;
// if this is a new word, then create a new key/value pair in the targets object
if (targets[target] === undefined) {
targets[target] = [targetID];
targetWords.push(target);
}
// only add targetID to array once
else if (targets[target].indexOf(targetID) < 0)
targets[target] = [...targets[target], targetID];
});

const words: WordPerformance[] = [];
Object.keys(targets).forEach((target) => {
// create array that contains each word and every attempt made to type the word
targetWords.forEach((target) => {
const thisWordPerf: WordPerformance = { target, attempts: [] };

targets[target].forEach((targetID) => {
const row = input.query(input["targetID"].eq(targetID));
const { correct, timestamp, key } = row;
const row = tidy(
input,
filter(({ targetID: searchID }) => searchID === targetID)
);
const [key, correct, timestamp] = pullColumns(
tidy(row, select(["key", "correct", "timestamp"])),
["key", "correct", "timestamp"]
) as [string[], boolean[], number[]];
thisWordPerf.attempts.push({
key: key.values,
correct: correct.values,
timestamps: timestamp.values,
key,
correct,
timestamp,
});
});
words.push(thisWordPerf);
@@ -34,29 +52,6 @@ const transformIntoWords = (input: DataFrame): WordPerformance[] => {
return words;
};

interface FinalOutputItem {
key: string;
timestamp: number;
}
// we need to get the final output i.e. the key events minus the ones that were followed by backspace
const createFinalOutput = (
keys: string[],
timestamp: number[]
): FinalOutputItem[] => {
let out: FinalOutputItem[] = [];
keys.forEach((k, i) => {
if (k !== "Backspace") out.push({ key: k, timestamp: timestamp[i] });
else out = out.slice(0, out.length - 1);
});
return out;
};

interface NGramTarget {
nGram: string;
startIndex: number;
endIndex: number;
}

// get corresponding performance data for a given nGram
const dumpNGrams = (
nGramsInTarget: NGramTarget[],
@@ -97,18 +92,18 @@ const createNGramsFromWords = (
}
);
const individualNGramPerformance = attempts.flatMap(
({ key, correct, timestamps }): nGramFeedback[] => {
({ key, correct, timestamp }): nGramFeedback[] => {
// if ever key press was correct, then we can just dump that into the nGram object
// if there was a miss, we need to figure out what we can work with
// the simplest is when there's just a mistyped character but the string is still the same length as the target
if (correct.indexOf(false) <= -1 || target.length === key.length) {
return dumpNGrams(nGramsInTarget, correct, timestamps);
return dumpNGrams(nGramsInTarget, correct, timestamp);
}
// if there's backspaces, then we need to create the full visual history so we can calculate the nGram duration
const finalTyped = createFinalOutput(key, timestamps);
const finalTyped = createFinalOutput(key, timestamp);
// if the lengths are the same, then it can be treated like a simply mistype
if (finalTyped.length === target.length) {
return dumpNGrams(nGramsInTarget, correct, timestamps);
return dumpNGrams(nGramsInTarget, correct, timestamp);
}
// if the lengths differ, then there's probably a way we could handle it,
// but i don't want to implement it right now :-P
@@ -137,10 +132,11 @@ const createNGramsFromWords = (
};

const computeNGram = (
session: DataFrame,
session: LongSessionRow[],
nGramSize: number
): nGramFeedback[] => {
const words = transformIntoWords(session);
console.log(words);
const nGrams = createNGramsFromWords(words, nGramSize);
return nGrams;
};
90 changes: 43 additions & 47 deletions src/analysis/computeRunningWPM.ts
Original file line number Diff line number Diff line change
@@ -1,58 +1,54 @@
import { DataFrame } from "danfojs";
import {
tidy,
filter,
mutate,
groupBy,
summarize,
first,
last,
} from "@tidyjs/tidy";
import { LongSessionRow } from "../types";

const computeRunningWPM = (session: DataFrame): number[] => {
const computeRunningWPM = (session: LongSessionRow[]): number[] => {
// filter out incorrect key presses
const correctEvents = session.query(
session["key"].values.map((k: string) => k !== "Backspace")
const correctEvents = tidy(
session,
filter(({ key }) => key !== "Backspace")
);
// if there are not enough correct events, then stop and return -1
if (correctEvents.values.length < 5) return [-1];
if (correctEvents.length < 5) return [-1];
// create groups of 5 characters and measure how long each one took
const totalRows = correctEvents.shape[0];
// combine the last two groups so there's enough data
const remainder = totalRows % 5;
const wpmGroupLabelsWithoutRemainder = Array.from(Array(totalRows)).map(
(_, i) => Math.trunc(i / 5)
// to prevent having too small a last group, we'll only take n rows up to a number divisible by 5
const lastGroup = Math.trunc(correctEvents.length / 5);
let rowNum = 0;
const annotatedSession = tidy(
correctEvents,
mutate({
wpmGroup: () => {
const group = Math.trunc(rowNum / 5);
rowNum += 1;
return group;
},
}),
filter(({ wpmGroup }) => wpmGroup < lastGroup)
);
const wpmGroupLabels = [
...wpmGroupLabelsWithoutRemainder.slice(
0,
wpmGroupLabelsWithoutRemainder.length - remainder
),
...Array.from(Array(remainder)).map(() => Math.trunc(totalRows / 5) - 1),
];

const annotatedSession = correctEvents.addColumn("wpmGroup", wpmGroupLabels);
const maxLabel = wpmGroupLabels.slice(-1)[0];
// iterate through row groups and get duration
const runningWPMValues = Array.from(new Array(maxLabel)).map(
(_, i): number => {
// grab the timestamp col for the 5 character group
const mask = annotatedSession["wpmGroup"].eq(i);
let queryRes;
try {
queryRes = annotatedSession.query(mask);
} catch (err) {
console.error("failed to query dataframe!", {
err,
targetedWPMGroup: i,
mask,
annotatedSession,
});
return -1;
}
const group = queryRes["timestamp"];
// compute group duration in seconds
const tStart = group.min({ axis: 1 });
const tEnd = group.max({ axis: 1 });
// this is the duration it took to type 5 characters
const duration = (tEnd - tStart) / 60000;
// so we can extrapolate that out to 1 minute to get the instantaneous WPM
const groupWPM = duration * 6000;
return Math.round(groupWPM);
}
const runningWPMValues = tidy(
annotatedSession,
groupBy("wpmGroup", [
summarize({ start: first("timestamp"), end: last("timestamp") }),
]),
mutate({
// this is the time it took to type 5 characters ie one word
duration: ({ start, end }) => (end - start) / 1000,
}),
mutate({
// so we can extrapolate that out to 60 seconds to get WPM
wpm: ({ duration }) => Math.round(duration * 60),
})
);
return runningWPMValues;

return runningWPMValues.map(({ wpm }) => wpm);
};

export default computeRunningWPM;
13 changes: 7 additions & 6 deletions src/analysis/computeWPM.ts
Original file line number Diff line number Diff line change
@@ -1,14 +1,15 @@
import { DataFrame } from "danfojs";
import { tidy, select } from "@tidyjs/tidy";
import { LongSessionRow } from "../types";

const computeWPM = (session: DataFrame): number => {
const first = session.head(1)["timestamp"];
const last = session.tail(1)["timestamp"];
const computeWPM = (session: LongSessionRow[]): number => {
const first = session[0]["timestamp"];
const last = session[session.length - 1]["timestamp"];
// get the total duration of the test
const duration = last.sub(first).values[0];
const duration = last - first;
// divide to get minutes
const durationInMin = duration / 60000;
// get the number of correct characters typed
const chars = session.query(session["correct"]).shape[0];
const chars = tidy(session, select("correct")).length;
// assume an average word is 5 characters
const words = chars / 5;
const wpm = words / durationInMin;
5 changes: 2 additions & 3 deletions src/analysis/sessionPostProcessing.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
import * as dfd from "danfojs";
import { v4 as uuidv4 } from "uuid";
import { LongSessionRow, Word } from "../types";
import computeAccuracy from "./computeAccuracy";
@@ -25,14 +24,14 @@ export const dumpSession = async (session: Word[]): Promise<void> => {
};

// create a dataframe where each typing event takes up one row
export const pivotSessionLong = (session: Word[]): dfd.DataFrame => {
export const pivotSessionLong = (session: Word[]): LongSessionRow[] => {
const rows = session.flatMap(({ target, history }) =>
history.map((e) => ({
target,
...e,
}))
) as LongSessionRow[];

const out = new dfd.DataFrame(rows);
const out = rows;
return out;
};
16 changes: 16 additions & 0 deletions src/bin/createFinalOutput.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
interface FinalOutputItem {
key: string;
timestamp: number;
}
// we need to get the final output i.e. the key events minus the ones that were followed by backspace
export const createFinalOutput = (
keys: string[],
timestamp: number[]
): FinalOutputItem[] => {
let out: FinalOutputItem[] = [];
keys.forEach((k, i) => {
if (k !== "Backspace") out.push({ key: k, timestamp: timestamp[i] });
else out = out.slice(0, out.length - 1);
});
return out;
};
12 changes: 12 additions & 0 deletions src/bin/pullColumns.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
// returns array of each column's data as an array
// assume that all objects contain the same number of keys
export const pullColumns = (items: Object[], columns: string[]): any[][] => {
const colData: { [key: string]: any[] } = {};
columns.forEach((c) => (colData[c] = []));
items.forEach((i: { [key: string]: any }) =>
columns.forEach((c) => {
colData[c].push(i[c]);
})
);
return columns.map((c) => colData[c]);
};
4 changes: 2 additions & 2 deletions src/components/Accuracy.tsx
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import { DataFrame } from "danfojs";
import computeAccuracy from "../analysis/computeAccuracy";
import { LongSessionRow } from "../types";

const Accuracy = ({ session }: { session: DataFrame }) => {
const Accuracy = ({ session }: { session: LongSessionRow[] }) => {
return (
<div className="analysis-stat">
Accuracy: {(100 * computeAccuracy(session)).toFixed(0)}%
22 changes: 12 additions & 10 deletions src/components/NGram.tsx
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
import { DataFrame } from "danfojs";
import { Series } from "danfojs/dist/danfojs-base";
import { tidy, mean, summarize, filter } from "@tidyjs/tidy";
import computeNGram from "../analysis/computeNGram";
import { nGramFeedback } from "../types";
import { LongSessionRow, nGramFeedback } from "../types";

interface ProcessedNGramData {
nGram: string;
@@ -12,11 +11,14 @@ interface ProcessedNGramData {

const processNGramData = (nGrams: nGramFeedback[]): ProcessedNGramData[] =>
nGrams.map(({ nGram, performance }) => {
const meanDuration = Math.round(
new Series(performance.map(({ duration }) => duration)).mean()
);
const correctData = performance.flatMap(
({ correct }) => correct.indexOf(false) <= -1
const meanDuration = tidy(
performance,
summarize({ meanDuration: mean("duration") })
)[0]["meanDuration"] as number;

const correctData = tidy(
performance,
filter(({ correct }) => correct.indexOf(false) <= -1)
);
const meanAccuracy =
correctData.filter((e) => e).length / correctData.length;
@@ -42,12 +44,12 @@ const displayNGramData = (
<tr>
<th scope="row">{nGram}</th>
<td>{nGramCount}</td>
<td>{meanDuration}ms</td>
<td>{Math.round(meanDuration)}ms</td>
<td>{Math.round(meanAccuracy * 100)}%</td>
</tr>
));

const NGram = ({ session }: { session: DataFrame }) => {
const NGram = ({ session }: { session: LongSessionRow[] }) => {
const sortBy: NGramSortables = "nGramCount";
const nRows = 10;

8 changes: 6 additions & 2 deletions src/components/RunningWPM.tsx
Original file line number Diff line number Diff line change
@@ -1,9 +1,13 @@
import { ReactElement } from "react";
import ECharts from "echarts-for-react";
import { DataFrame } from "danfojs";
import computeRunningWPM from "../analysis/computeRunningWPM";
import { LongSessionRow } from "../types";

const RunningWPM = ({ session }: { session: DataFrame }): ReactElement => {
const RunningWPM = ({
session,
}: {
session: LongSessionRow[];
}): ReactElement => {
// blah
const runningWPM = computeRunningWPM(session);
const options = {
4 changes: 2 additions & 2 deletions src/components/WPM.tsx
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import { DataFrame } from "danfojs";
import computeWPM from "../analysis/computeWPM";
import { LongSessionRow } from "../types";

const WPM = ({ session }: { session: DataFrame }) => {
const WPM = ({ session }: { session: LongSessionRow[] }) => {
const wpm = computeWPM(session);
return <div className="analysis-stat">WPM: {wpm}</div>;
};
5 changes: 2 additions & 3 deletions src/routes/Analysis.tsx
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
import { DataFrame } from "danfojs";
import { useEffect, useState } from "react";
import { Link } from "react-router-dom";
import { RxDocument } from "rxdb";
@@ -9,7 +8,7 @@ import NGram from "../components/NGram";
import RunningWPM from "../components/RunningWPM";
import WPM from "../components/WPM";
import "../styles/Analysis.scss";
import { Word } from "../types";
import { LongSessionRow, Word } from "../types";
import { SessionType } from "../types/sessionSchema";

const Vis = () => {
@@ -18,7 +17,7 @@ const Vis = () => {
Function
] = useState([]);
const [selectedSession, setSelectedSession]: [
DataFrame | undefined,
LongSessionRow[] | undefined,
Function
] = useState();
const [selectedSessionUUID, setSelectedSessionUUID] = useState("");
2 changes: 1 addition & 1 deletion src/types/index.ts
Original file line number Diff line number Diff line change
@@ -30,7 +30,7 @@ export interface WordPerformance {
// array of whether the right key was pressed
correct: boolean[];
// array of timestamps so we can know how long the word took to type
timestamps: number[];
timestamp: number[];
}[];
}