4
4
import re
5
5
import html
6
6
import json
7
- from bs4 import BeautifulSoup
8
7
from enum import Enum
8
+ from bs4 import BeautifulSoup
9
9
import aiohttp
10
10
import requests
11
11
from fake_useragent import UserAgent
@@ -100,7 +100,7 @@ def send_web_request(game_name: str, search_modifiers: SearchModifiers = SearchM
100
100
api_key_result = HTMLRequests .send_website_request_getcode (True )
101
101
# Make the post request and return the result if is valid
102
102
search_url_with_key = HTMLRequests .SEARCH_URL + "/" + api_key_result
103
- resp = requests .post (search_url_with_key , headers = headers , data = payload )
103
+ resp = requests .post (search_url_with_key , headers = headers , data = payload , timeout = 60 )
104
104
if resp .status_code == 200 :
105
105
return resp .text
106
106
return None
@@ -129,7 +129,7 @@ async def send_async_web_request(game_name: str, search_modifiers: SearchModifie
129
129
return None
130
130
131
131
@staticmethod
132
- def __cut_game_title (game_title : str ):
132
+ def __cut_game_title (page_source : str ):
133
133
"""
134
134
Function that extract the game title from the html title of the howlongtobeat page
135
135
@param game_title: The HowLongToBeat page title of the game
@@ -138,12 +138,15 @@ def __cut_game_title(game_title: str):
138
138
(So, in this example: "A Way Out")
139
139
"""
140
140
141
- if game_title is None or len (game_title ) == 0 :
141
+ if page_source is None or len (page_source ) == 0 :
142
142
return None
143
143
144
- title = re .search ("<title>(.*)<\/title>" , game_title )
144
+ soup = BeautifulSoup (page_source , 'html.parser' )
145
+ title_tag = soup .title
146
+ title_text = title_tag .string
147
+
145
148
# The position of start and end of this method may change if the website change
146
- cut_title = str ( html . unescape ( title . group ( 1 ) [12 :- 17 ]) )
149
+ cut_title = title_text [12 :- 17 ]. strip ( )
147
150
return cut_title
148
151
149
152
@staticmethod
@@ -183,7 +186,7 @@ def get_game_title(game_id: int):
183
186
headers = HTMLRequests .get_title_request_headers ()
184
187
185
188
# Request and extract title
186
- contents = requests .get (HTMLRequests .GAME_URL , params = params , headers = headers )
189
+ contents = requests .get (HTMLRequests .GAME_URL , params = params , headers = headers , timeout = 60 )
187
190
return HTMLRequests .__cut_game_title (contents .text )
188
191
189
192
@staticmethod
@@ -213,7 +216,7 @@ def send_website_request_getcode(parse_all_scripts: bool):
213
216
"""
214
217
# Make the post request and return the result if is valid
215
218
headers = HTMLRequests .get_title_request_headers ()
216
- resp = requests .get (HTMLRequests .BASE_URL , headers = headers )
219
+ resp = requests .get (HTMLRequests .BASE_URL , headers = headers , timeout = 60 )
217
220
if resp .status_code == 200 and resp .text is not None :
218
221
# Parse the HTML content using BeautifulSoup
219
222
soup = BeautifulSoup (resp .text , 'html.parser' )
@@ -225,7 +228,7 @@ def send_website_request_getcode(parse_all_scripts: bool):
225
228
matching_scripts = [script ['src' ] for script in scripts if '_app-' in script ['src' ]]
226
229
for script_url in matching_scripts :
227
230
script_url = HTMLRequests .BASE_URL + script_url
228
- script_resp = requests .get (script_url , headers = headers )
231
+ script_resp = requests .get (script_url , headers = headers , timeout = 60 )
229
232
if script_resp .status_code == 200 and script_resp .text is not None :
230
233
pattern = r'"/api/search/".concat\("([a-zA-Z0-9]+)"\)'
231
234
matches = re .findall (pattern , script_resp .text )
0 commit comments