22import functools
33import json
44import logging
5+ import math
56import os
67import time
78from functools import lru_cache
8- from typing import Callable , Dict , Iterator , List , Tuple , Type
9-
10- import diskcache # type: ignore
9+ from typing import Callable , Dict , List , Tuple , Type , Union
1110
1211# https://github.com/prius/python-leetcode
1312import leetcode .api .default_api # type: ignore
1615import leetcode .configuration # type: ignore
1716import leetcode .models .graphql_query # type: ignore
1817import leetcode .models .graphql_query_get_question_detail_variables # type: ignore
18+ import leetcode .models .graphql_query_problemset_question_list_variables # type: ignore
19+ import leetcode .models .graphql_query_problemset_question_list_variables_filter_input # type: ignore
20+ import leetcode .models .graphql_question_detail # type: ignore
1921import urllib3 # type: ignore
22+ from tqdm import tqdm
2023
2124CACHE_DIR = "cache"
2225
@@ -49,20 +52,6 @@ def _get_leetcode_api_client() -> leetcode.api.default_api.DefaultApi:
4952 return api_instance
5053
5154
52- def get_leetcode_task_handles () -> Iterator [Tuple [str , str , str ]]:
53- """
54- Get task handles for all the leetcode problems.
55- """
56- api_instance = _get_leetcode_api_client ()
57-
58- for topic in ["algorithms" , "database" , "shell" , "concurrency" ]:
59- api_response = api_instance .api_problems_topic_get (topic = topic )
60- for stat_status_pair in api_response .stat_status_pairs :
61- stat = stat_status_pair .stat
62-
63- yield (topic , stat .question__title , stat .question__title_slug )
64-
65-
6655def retry (times : int , exceptions : Tuple [Type [Exception ]], delay : float ) -> Callable :
6756 """
6857 Retry Decorator
@@ -98,44 +87,117 @@ class LeetcodeData:
9887 names.
9988 """
10089
101- def __init__ (self ) -> None :
90+ def __init__ (self , start : int , stop : int ) -> None :
10291 """
10392 Initialize leetcode API and disk cache for API responses
10493 """
94+ if start < 0 :
95+ raise ValueError (f"Start must be non-negative: { start } " )
96+
97+ if stop < 0 :
98+ raise ValueError (f"Stop must be non-negative: { start } " )
99+
100+ if start > stop :
101+ raise ValueError (f"Start (){ start } ) must be not greater than stop ({ stop } )" )
102+
103+ self ._start = start
104+ self ._stop = stop
105+
105106 self ._api_instance = _get_leetcode_api_client ()
106107
107- if not os . path . exists ( CACHE_DIR ):
108- os . mkdir ( CACHE_DIR )
109- self . _cache = diskcache . Cache ( CACHE_DIR )
108+ self . _cache_container : Dict [
109+ str , leetcode . models . graphql_question_detail . GraphqlQuestionDetail
110+ ] = {}
110111
111- @retry (times = 3 , exceptions = (urllib3 .exceptions .ProtocolError ,), delay = 5 )
112- async def _get_problem_data (self , problem_slug : str ) -> Dict [str , str ]:
112+ @property
113+ async def _cache (
114+ self ,
115+ ) -> Dict [str , leetcode .models .graphql_question_detail .GraphqlQuestionDetail ]:
113116 """
114- Get data about a specific problem (method output if cached to reduce
115- the load on the leetcode API)
117+ Cached method to return dict (problem_slug -> question details)
116118 """
117- if problem_slug in self ._cache :
118- return self ._cache [problem_slug ]
119+ cache = self ._cache_container
120+
121+ if not cache :
122+ problems = await self ._get_problems_data ()
123+ cache = {problem .title_slug : problem for problem in problems }
124+
125+ self ._cache_container = cache
126+
127+ return cache
119128
129+ @retry (times = 3 , exceptions = (urllib3 .exceptions .ProtocolError ,), delay = 5 )
130+ async def _get_problems_count (self ) -> int :
120131 api_instance = self ._api_instance
121132
122133 graphql_request = leetcode .models .graphql_query .GraphqlQuery (
123134 query = """
124- query getQuestionDetail($titleSlug: String!) {
125- question(titleSlug: $titleSlug) {
126- freqBar
135+ query problemsetQuestionList($categorySlug: String, $limit: Int, $skip: Int, $filters: QuestionListFilterInput) {
136+ problemsetQuestionList: questionList(
137+ categorySlug: $categorySlug
138+ limit: $limit
139+ skip: $skip
140+ filters: $filters
141+ ) {
142+ totalNum
143+ }
144+ }
145+ """ ,
146+ variables = leetcode .models .graphql_query_problemset_question_list_variables .GraphqlQueryProblemsetQuestionListVariables (
147+ category_slug = "" ,
148+ limit = 1 ,
149+ skip = 0 ,
150+ filters = leetcode .models .graphql_query_problemset_question_list_variables_filter_input .GraphqlQueryProblemsetQuestionListVariablesFilterInput (
151+ tags = [],
152+ # difficulty="MEDIUM",
153+ # status="NOT_STARTED",
154+ # list_id="7p5x763", # Top Amazon Questions
155+ # premium_only=False,
156+ ),
157+ ),
158+ operation_name = "problemsetQuestionList" ,
159+ )
160+
161+ # Critical section. Don't allow more than one parallel request to
162+ # the Leetcode API
163+ async with leetcode_api_access_lock :
164+ time .sleep (2 ) # Leetcode has a rate limiter
165+ data = api_instance .graphql_post (body = graphql_request ).data
166+
167+ return data .problemset_question_list .total_num or 0
168+
169+ @retry (times = 3 , exceptions = (urllib3 .exceptions .ProtocolError ,), delay = 5 )
170+ async def _get_problems_data_page (
171+ self , offset : int , page_size : int , page : int
172+ ) -> List [leetcode .models .graphql_question_detail .GraphqlQuestionDetail ]:
173+ api_instance = self ._api_instance
174+ graphql_request = leetcode .models .graphql_query .GraphqlQuery (
175+ query = """
176+ query problemsetQuestionList($categorySlug: String, $limit: Int, $skip: Int, $filters: QuestionListFilterInput) {
177+ problemsetQuestionList: questionList(
178+ categorySlug: $categorySlug
179+ limit: $limit
180+ skip: $skip
181+ filters: $filters
182+ ) {
183+ total: totalNum
184+ questions: data {
127185 questionId
128186 questionFrontendId
129187 boundTopicId
130188 title
189+ titleSlug
190+ categoryTitle
191+ frequency
192+ freqBar
131193 content
132194 translatedTitle
133- translatedContent
134195 isPaidOnly
135196 difficulty
136197 likes
137198 dislikes
138199 isLiked
200+ isFavor
139201 similarQuestions
140202 contributors {
141203 username
@@ -158,42 +220,100 @@ async def _get_problem_data(self, problem_slug: str) -> Dict[str, str]:
158220 __typename
159221 }
160222 stats
223+ acRate
224+ codeDefinition
161225 hints
162226 solution {
163227 id
164228 canSeeDetail
165229 __typename
166230 }
231+ hasSolution
232+ hasVideoSolution
167233 status
168234 sampleTestCase
235+ enableRunCode
169236 metaData
237+ translatedContent
170238 judgerAvailable
171239 judgeType
172240 mysqlSchemas
173- enableRunCode
174241 enableTestMode
175242 envInfo
176243 __typename
177- }
178244 }
245+ }
246+ }
179247 """ ,
180- variables = leetcode .models .graphql_query_get_question_detail_variables .GraphqlQueryGetQuestionDetailVariables ( # noqa: E501
181- title_slug = problem_slug
248+ variables = leetcode .models .graphql_query_problemset_question_list_variables .GraphqlQueryProblemsetQuestionListVariables (
249+ category_slug = "" ,
250+ limit = page_size ,
251+ skip = offset + page * page_size ,
252+ filters = leetcode .models .graphql_query_problemset_question_list_variables_filter_input .GraphqlQueryProblemsetQuestionListVariablesFilterInput (),
182253 ),
183- operation_name = "getQuestionDetail " ,
254+ operation_name = "problemsetQuestionList " ,
184255 )
185256
186257 # Critical section. Don't allow more than one parallel request to
187258 # the Leetcode API
188259 async with leetcode_api_access_lock :
189260 time .sleep (2 ) # Leetcode has a rate limiter
190- data = api_instance .graphql_post (body = graphql_request ).data .question
191-
192- # Save data in the cache
193- self ._cache [problem_slug ] = data
261+ data = api_instance .graphql_post (
262+ body = graphql_request
263+ ).data .problemset_question_list .questions
194264
195265 return data
196266
267+ async def _get_problems_data (
268+ self ,
269+ ) -> List [leetcode .models .graphql_question_detail .GraphqlQuestionDetail ]:
270+ problem_count = await self ._get_problems_count ()
271+
272+ if self ._start > problem_count :
273+ raise ValueError (
274+ "Start ({self._start}) is greater than problems count ({problem_count})"
275+ )
276+
277+ start = self ._start
278+ stop = min (self ._stop , problem_count )
279+
280+ page_size = min (50 , stop - start + 1 )
281+
282+ problems : List [
283+ leetcode .models .graphql_question_detail .GraphqlQuestionDetail
284+ ] = []
285+
286+ logging .info (f"Fetching { stop - start + 1 } problems { page_size } per page" )
287+
288+ for page in tqdm (
289+ range (math .ceil ((stop - start + 1 ) / page_size )),
290+ unit = "problem" ,
291+ unit_scale = page_size ,
292+ ):
293+ data = await self ._get_problems_data_page (start , page_size , page )
294+ problems .extend (data )
295+
296+ return problems
297+
298+ async def all_problems_handles (self ) -> List [str ]:
299+ """
300+ Get all problem handles known.
301+
302+ Example: ["two-sum", "three-sum"]
303+ """
304+ return list ((await self ._cache ).keys ())
305+
306+ async def _get_problem_data (
307+ self , problem_slug : str
308+ ) -> leetcode .models .graphql_question_detail .GraphqlQuestionDetail :
309+ """
310+ TODO: Legacy method. Needed in the old architecture. Can be replaced
311+ with direct cache calls later.
312+ """
313+ cache = await self ._cache
314+ if problem_slug in cache :
315+ return cache [problem_slug ]
316+
197317 async def _get_description (self , problem_slug : str ) -> str :
198318 """
199319 Problem description
@@ -296,3 +416,17 @@ async def freq_bar(self, problem_slug: str) -> float:
296416 """
297417 data = await self ._get_problem_data (problem_slug )
298418 return data .freq_bar or 0
419+
420+ async def title (self , problem_slug : str ) -> float :
421+ """
422+ Returns problem title
423+ """
424+ data = await self ._get_problem_data (problem_slug )
425+ return data .title
426+
427+ async def category (self , problem_slug : str ) -> float :
428+ """
429+ Returns problem category title
430+ """
431+ data = await self ._get_problem_data (problem_slug )
432+ return data .category_title
0 commit comments