@@ -24,11 +24,70 @@ class SearchModifiers(Enum):
24
24
HIDE_DLC = "hide_dlc"
25
25
26
26
27
+ class SearchInformations :
28
+ search_url = None
29
+ api_key = None
30
+
31
+ def __init__ (self , script_content : str ):
32
+ self .api_key = self .__extract_api_from_script (script_content )
33
+ self .search_url = self .__extract_search_url_script (script_content )
34
+ if HTMLRequests .BASE_URL .endswith ("/" ) and self .search_url is not None :
35
+ self .search_url = self .search_url .lstrip ("/" )
36
+
37
+ def __extract_api_from_script (self , script_content : str ):
38
+ """
39
+ Function that extract the htlb code to use in the request from the given script
40
+ @return: the string of the api key found
41
+ """
42
+ # Try multiple find one after the other as hltb keep changing format
43
+ # Test 1 - The API Key is in the user id in the request json
44
+ user_id_api_key_pattern = r'users\s*:\s*{\s*id\s*:\s*"([^"]+)"'
45
+ matches = re .findall (user_id_api_key_pattern , script_content )
46
+ if matches :
47
+ key = '' .join (matches )
48
+ return key
49
+ # Test 2 - The API Key is in format fetch("/api/[word here]/".concat("X").concat("Y")...
50
+ concat_api_key_pattern = r'\/api\/\w+\/"(?:\.concat\("[^"]*"\))*'
51
+ matches = re .findall (concat_api_key_pattern , script_content )
52
+ if matches :
53
+ matches = str (matches ).split ('.concat' )
54
+ matches = [re .sub (r'["\(\)\[\]\']' , '' , match ) for match in matches [1 :]]
55
+ key = '' .join (matches )
56
+ return key
57
+ # Unable to find :(
58
+ return None
59
+
60
+ def __extract_search_url_script (self , script_content : str ):
61
+ """
62
+ Function that extract the htlb search url to append from the script as /api/search
63
+ @return: the search url to append
64
+ """
65
+ pattern = re .compile (
66
+ r'fetch\(\s*["\'](\/api\/[^"\']*)["\']' # Matches the endpoint
67
+ r'((?:\s*\.concat\(\s*["\']([^"\']*)["\']\s*\))+)' # Captures concatenated strings
68
+ r'\s*,' , # Matches up to the comma
69
+ re .DOTALL
70
+ )
71
+ matches = pattern .finditer (script_content )
72
+ for match in matches :
73
+ endpoint = match .group (1 )
74
+ concat_calls = match .group (2 )
75
+ # Extract all concatenated strings
76
+ concat_strings = re .findall (r'\.concat\(\s*["\']([^"\']*)["\']\s*\)' , concat_calls )
77
+ concatenated_str = '' .join (concat_strings )
78
+ # Check if the concatenated string matches the known string
79
+ if concatenated_str == self .api_key :
80
+ return endpoint
81
+ # Unable to find :(
82
+ return None
83
+
84
+
27
85
class HTMLRequests :
28
86
BASE_URL = 'https://howlongtobeat.com/'
29
87
REFERER_HEADER = BASE_URL
30
- SEARCH_URL = BASE_URL + "api/s" # should update this to some kind of regex for api/[any alphanumeric characters here] to be more future proof since this keeps changing
31
88
GAME_URL = BASE_URL + "game"
89
+ # Static search url to use in case it can't be extracted from JS code
90
+ SEARCH_URL = BASE_URL + "api/s/"
32
91
33
92
@staticmethod
34
93
def get_search_request_headers ():
@@ -46,7 +105,7 @@ def get_search_request_headers():
46
105
return headers
47
106
48
107
@staticmethod
49
- def get_search_request_data (game_name : str , search_modifiers : SearchModifiers , page : int , api_key : str ):
108
+ def get_search_request_data (game_name : str , search_modifiers : SearchModifiers , page : int , search_info : SearchInformations ):
50
109
"""
51
110
Generate the data payload for the search request
52
111
@param game_name: The name of the game to search
@@ -96,8 +155,8 @@ def get_search_request_data(game_name: str, search_modifiers: SearchModifiers, p
96
155
}
97
156
98
157
# If api_key is passed add it to the dict
99
- if api_key is not None :
100
- payload ['searchOptions' ]['users' ]['id' ] = api_key
158
+ if search_info is not None and search_info . api_key is not None :
159
+ payload ['searchOptions' ]['users' ]['id' ] = search_info . api_key
101
160
102
161
return json .dumps (payload )
103
162
@@ -112,19 +171,21 @@ def send_web_request(game_name: str, search_modifiers: SearchModifiers = SearchM
112
171
@return: The HTML code of the research if the request returned 200(OK), None otherwise
113
172
"""
114
173
headers = HTMLRequests .get_search_request_headers ()
115
- api_key_result = HTMLRequests .send_website_request_getcode (False )
116
- if api_key_result is None :
117
- api_key_result = HTMLRequests .send_website_request_getcode (True )
174
+ search_info_data = HTMLRequests .send_website_request_getcode (False )
175
+ if search_info_data is None or search_info_data . api_key is None :
176
+ search_info_data = HTMLRequests .send_website_request_getcode (True )
118
177
# Make the request
178
+ if search_info_data .search_url is not None :
179
+ HTMLRequests .SEARCH_URL = HTMLRequests .BASE_URL + search_info_data .search_url
119
180
# The main method currently is the call to the API search URL
120
- search_url_with_key = HTMLRequests .SEARCH_URL + "/" + api_key_result
181
+ search_url_with_key = HTMLRequests .SEARCH_URL + search_info_data . api_key
121
182
payload = HTMLRequests .get_search_request_data (game_name , search_modifiers , page , None )
122
183
resp = requests .post (search_url_with_key , headers = headers , data = payload , timeout = 60 )
123
184
if resp .status_code == 200 :
124
185
return resp .text
125
186
# Try to call with the standard url adding the api key to the user
126
187
search_url = HTMLRequests .SEARCH_URL
127
- payload = HTMLRequests .get_search_request_data (game_name , search_modifiers , page , api_key_result )
188
+ payload = HTMLRequests .get_search_request_data (game_name , search_modifiers , page , search_info_data )
128
189
resp = requests .post (search_url , headers = headers , data = payload , timeout = 60 )
129
190
if resp .status_code == 200 :
130
191
return resp .text
@@ -141,20 +202,22 @@ async def send_async_web_request(game_name: str, search_modifiers: SearchModifie
141
202
@return: The HTML code of the research if the request returned 200(OK), None otherwise
142
203
"""
143
204
headers = HTMLRequests .get_search_request_headers ()
144
- api_key_result = await HTMLRequests .async_send_website_request_getcode (False )
145
- if api_key_result is None :
146
- api_key_result = await HTMLRequests .async_send_website_request_getcode (True )
205
+ search_info_data = HTMLRequests .send_website_request_getcode (False )
206
+ if search_info_data is None or search_info_data . api_key is None :
207
+ search_info_data = HTMLRequests .send_website_request_getcode (True )
147
208
# Make the request
209
+ if search_info_data .search_url is not None :
210
+ HTMLRequests .SEARCH_URL = HTMLRequests .BASE_URL + search_info_data .search_url
148
211
# The main method currently is the call to the API search URL
149
- search_url_with_key = HTMLRequests .SEARCH_URL + "/" + api_key_result
212
+ search_url_with_key = HTMLRequests .SEARCH_URL + search_info_data . api_key
150
213
payload = HTMLRequests .get_search_request_data (game_name , search_modifiers , page , None )
151
214
async with aiohttp .ClientSession () as session :
152
215
async with session .post (search_url_with_key , headers = headers , data = payload ) as resp_with_key :
153
216
if resp_with_key is not None and resp_with_key .status == 200 :
154
217
return await resp_with_key .text ()
155
218
else :
156
219
search_url = HTMLRequests .SEARCH_URL
157
- payload = HTMLRequests .get_search_request_data (game_name , search_modifiers , page , api_key_result )
220
+ payload = HTMLRequests .get_search_request_data (game_name , search_modifiers , page , search_info_data )
158
221
async with session .post (search_url , headers = headers , data = payload ) as resp_user_id :
159
222
if resp_user_id is not None and resp_user_id .status == 200 :
160
223
return await resp_user_id .text ()
@@ -240,30 +303,6 @@ async def async_get_game_title(game_id: int):
240
303
return HTMLRequests .__cut_game_title (text )
241
304
return None
242
305
243
- @staticmethod
244
- def extract_api_from_script (script_content : str ):
245
- """
246
- Function that extract the htlb code to use in the request from the given script
247
- @return: the string of the api key found
248
- """
249
- # Try multiple find one after the other as hltb keep changing format
250
- # Test 1 - The API Key is in the user id in the request json
251
- user_id_api_key_pattern = r'users\s*:\s*{\s*id\s*:\s*"([^"]+)"'
252
- matches = re .findall (user_id_api_key_pattern , script_content )
253
- if matches :
254
- key = '' .join (matches )
255
- return key
256
- # Test 2 - The API Key is in format fetch("/api/[word here]/".concat("X").concat("Y")...
257
- concat_api_key_pattern = r'\/api\/\w+\/"(?:\.concat\("[^"]*"\))*'
258
- matches = re .findall (concat_api_key_pattern , script_content )
259
- if matches :
260
- matches = str (matches ).split ('.concat' )
261
- matches = [re .sub (r'["\(\)\[\]\']' , '' , match ) for match in matches [1 :]]
262
- key = '' .join (matches )
263
- return key
264
- # Unable to find :(
265
- return None
266
-
267
306
@staticmethod
268
307
def send_website_request_getcode (parse_all_scripts : bool ):
269
308
"""
@@ -286,9 +325,10 @@ def send_website_request_getcode(parse_all_scripts: bool):
286
325
script_url = HTMLRequests .BASE_URL + script_url
287
326
script_resp = requests .get (script_url , headers = headers , timeout = 60 )
288
327
if script_resp .status_code == 200 and script_resp .text is not None :
289
- api_key_result = HTMLRequests .extract_api_from_script (script_resp .text )
290
- if api_key_result is not None :
291
- return api_key_result
328
+ search_info = SearchInformations (script_resp .text )
329
+ if search_info .api_key is not None :
330
+ # The api key is necessary
331
+ return search_info
292
332
return None
293
333
294
334
@staticmethod
@@ -317,9 +357,10 @@ async def async_send_website_request_getcode(parse_all_scripts: bool):
317
357
async with session .get (script_url , headers = headers ) as script_resp :
318
358
if script_resp is not None and resp .status == 200 :
319
359
script_resp_text = await script_resp .text ()
320
- api_key_result = HTMLRequests .extract_api_from_script (script_resp_text )
321
- if api_key_result is not None :
322
- return api_key_result
360
+ search_info = SearchInformations (script_resp_text )
361
+ if search_info .api_key is not None :
362
+ # The api key is necessary
363
+ return search_info
323
364
else :
324
365
return None
325
366
else :
0 commit comments