kertser commited on
Commit
666d8ca
1 Parent(s): 321551d

Upload WarOnline_Chat_test.py

Browse files

Fixed some bugs. This is a main file version candidate...

Files changed (1) hide show
  1. WarOnline_Chat_test.py +55 -38
WarOnline_Chat_test.py CHANGED
@@ -93,7 +93,7 @@ def post(message=message, thread_url=thread_url, post_url=post_url, quoted_by=""
93
 
94
  print('Post submitted successfully.')
95
 
96
- def messagesByUser(thread_url=thread_url, username="", startingPage=1):
97
  # Returns all the quotes for #username in the specific multi-page thread url
98
  allquotes =[]
99
 
@@ -109,6 +109,7 @@ def messagesByUser(thread_url=thread_url, username="", startingPage=1):
109
  namePattern = re.compile('data-lb-caption-desc="(.*?) ·')
110
  messageIDPattern = re.compile('data-lb-id="(.*?)"')
111
  quotedIDPattern = re.compile('data-source="(.*?)"')
 
112
 
113
  while not lastPage:
114
  response = requests.get(thread_url + 'page-' + str(page))
@@ -124,37 +125,41 @@ def messagesByUser(thread_url=thread_url, username="", startingPage=1):
124
  messageData = soup.find_all('div', {'class': 'message-userContent lbContainer js-lbContainer'})
125
 
126
  for data in messageData:
127
-
128
- if (username) in data.text:
129
- try:
130
- # Get the messager username
131
- matchName = namePattern.search(str(data))
132
- if matchName:
133
- messengerName = matchName.group(1)
134
-
135
- # Get the quoted ID
136
- matchID = quotedIDPattern.search(str(data))
137
- if matchID:
138
- quotedID = matchID.group(1)
139
-
140
- # Get the message ID
141
- matchID = messageIDPattern.search(str(data))
142
- if matchID:
143
- messageID = matchID.group(1)
144
-
145
- # Make sure that the messages have a quote inside
146
- blockquote = data.find('blockquote')
147
- if blockquote:
148
- # Extract the text
149
- text = data.find('div', {'class': 'bbWrapper'})
150
- for bq in text.find_all('blockquote'):
151
- bq.extract()
152
- reply = text.get_text().replace('\n', ' ').strip()
153
-
154
- allquotes.append({'reply': reply, 'messengerName': messengerName, 'messageID': messageID, 'quotedID': quotedID})
155
-
156
- except:
157
- continue # There was no text in quote, move next
 
 
 
 
158
 
159
  #check if that is not a last page
160
  if not compare_pages(thread_url + 'page-' + str(page), thread_url + 'page-' + str(page + 1)):
@@ -167,8 +172,12 @@ def messagesByUser(thread_url=thread_url, username="", startingPage=1):
167
  return allquotes
168
  def WarOnlineBot():
169
  # Get All Quotes by all users
170
- allMessages = messagesByUser(thread_url=thread_url, username="", startingPage=1)
171
- botMessages = messagesByUser(thread_url=thread_url, username=username, startingPage=1)
 
 
 
 
172
  for quote in quotes:
173
  pass
174
 
@@ -189,13 +198,21 @@ if __name__ == '__main__':
189
  login(username=username, password=password, thread_url=thread_url)
190
  print("logged in")
191
 
192
- ### Disabled. Will be enabled later on ###
193
  #post(message=message, thread_url=thread_url, post_url=post_url, quoted_by='Василий Пупкин', quote_text='quoted message', quote_source='3926006')
194
 
195
- quotes = messagesByUser(username=username)
196
- for quote in quotes:
197
- print(quote)
 
 
 
 
 
 
 
198
 
 
199
  """
200
  # Start the scheduler
201
  while True:
 
93
 
94
  print('Post submitted successfully.')
95
 
96
+ def getMessages(thread_url=thread_url, quotedUser="", startingPage=1):
97
  # Returns all the quotes for #username in the specific multi-page thread url
98
  allquotes =[]
99
 
 
109
  namePattern = re.compile('data-lb-caption-desc="(.*?) ·')
110
  messageIDPattern = re.compile('data-lb-id="(.*?)"')
111
  quotedIDPattern = re.compile('data-source="(.*?)"')
112
+ quotedNamePattern = re.compile('data-quote="(.*?)"')
113
 
114
  while not lastPage:
115
  response = requests.get(thread_url + 'page-' + str(page))
 
125
  messageData = soup.find_all('div', {'class': 'message-userContent lbContainer js-lbContainer'})
126
 
127
  for data in messageData:
128
+ try:
129
+ # Get the messager username
130
+ matchName = namePattern.search(str(data))
131
+ if matchName:
132
+ messengerName = matchName.group(1)
133
+
134
+ # Get the quoted ID
135
+ matchID = quotedIDPattern.search(str(data))
136
+ if matchID:
137
+ quotedID = matchID.group(1)
138
+
139
+ # Get the message ID
140
+ matchID = messageIDPattern.search(str(data))
141
+ if matchID:
142
+ messageID = matchID.group(1)
143
+
144
+ matchQuotedName = quotedNamePattern.search(str(data))
145
+ if matchQuotedName:
146
+ quotedName = matchQuotedName.group(1)
147
+ if quotedUser and (quotedUser != quotedName):
148
+ continue
149
+
150
+ # Make sure that the messages have a quote inside
151
+ blockquote = data.find('blockquote')
152
+ if blockquote:
153
+ # Extract the text
154
+ text = data.find('div', {'class': 'bbWrapper'})
155
+ for bq in text.find_all('blockquote'):
156
+ bq.extract()
157
+ reply = text.get_text().replace('\n', ' ').strip()
158
+
159
+ allquotes.append({'reply': reply, 'messengerName': messengerName, 'messageID': messageID, 'quotedID': quotedID})
160
+
161
+ except:
162
+ continue # There was no text in quote, move next
163
 
164
  #check if that is not a last page
165
  if not compare_pages(thread_url + 'page-' + str(page), thread_url + 'page-' + str(page + 1)):
 
172
  return allquotes
173
  def WarOnlineBot():
174
  # Get All Quotes by all users
175
+ allMessages = getMessages(thread_url=thread_url, startingPage=1)
176
+ unrepliedMessages = []
177
+ for msg in allMessages:
178
+ if msg['messageID'].strip('-')[-1] != msg['quotedID'].strip(': ')[-1]:
179
+ unrepliedMessages.append(msg)
180
+
181
  for quote in quotes:
182
  pass
183
 
 
198
  login(username=username, password=password, thread_url=thread_url)
199
  print("logged in")
200
 
201
+ ### Disabled ###
202
  #post(message=message, thread_url=thread_url, post_url=post_url, quoted_by='Василий Пупкин', quote_text='quoted message', quote_source='3926006')
203
 
204
+ #messagedByBot = getMessages(thread_url=thread_url, quotedUser='WarBot', startingPage=1)
205
+ allMessages = getMessages(thread_url=thread_url, quotedUser='', startingPage=1)
206
+ repliedMessagesIDs = []
207
+
208
+ for msg in allMessages:
209
+ # Set a list of replied IDs
210
+ repliedMessagesIDs.append(msg['quotedID'].split(': ')[-1])
211
+ print(msg)
212
+ # remove empty elements
213
+ repliedMessagesIDs = [elem for elem in repliedMessagesIDs if elem]
214
 
215
+ print(repliedMessagesIDs)
216
  """
217
  # Start the scheduler
218
  while True: