Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ RLCard is a toolkit for Reinforcement Learning (RL) in card games. It supports m
* Dou Dizhu Demo: [Demo](https://douzero.org/)
* Resources: [Awesome-Game-AI](https://github.com/datamllab/awesome-game-ai)
* Related Project: [DouZero Project](https://github.com/kwai/DouZero)
* Zhihu: https://zhuanlan.zhihu.com/p/526723604

**Community:**
* **Slack**: Discuss in our [#rlcard-project](https://join.slack.com/t/rlcard/shared_invite/zt-rkvktsaq-xkMwz8BfKupCM6zGhO01xg) slack channel.
Expand All @@ -27,6 +28,7 @@ RLCard is a toolkit for Reinforcement Learning (RL) in card games. It supports m
* Group 2: 117349516

**News:**
* We have updated the tutorials in Jupyter Notebook to help you walk through RLCard! Please check [RLCard Tutorial](https://github.com/datamllab/rlcard-tutorial).
* All the algorithms can suppport [PettingZoo](https://github.com/PettingZoo-Team/PettingZoo) now. Please check [here](examples/pettingzoo). Thanks the contribtuion from [Yifei Cheng](https://github.com/ycheng517).
* Please follow [DouZero](https://github.com/kwai/DouZero), a strong Dou Dizhu AI and the [ICML 2021 paper](https://arxiv.org/abs/2106.06135). An online demo is available [here](https://douzero.org/). The algorithm is also integrated in RLCard. See [Training DMC on Dou Dizhu](docs/toy-examples.md#training-dmc-on-dou-dizhu).
* Our package is used in [PettingZoo](https://github.com/PettingZoo-Team/PettingZoo). Please check it out!
Expand Down
2 changes: 2 additions & 0 deletions README.zh-CN.md
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ RLCard是一款卡牌游戏强化学习 (Reinforcement Learning, RL) 的工具
* 斗地主演示:[Demo](https://douzero.org/)
* 资源:[Awesome-Game-AI](https://github.com/datamllab/awesome-game-ai)
* 相关项目:[DouZero项目](https://github.com/kwai/DouZero)
* 知乎:[https://zhuanlan.zhihu.com/p/526723604](https://zhuanlan.zhihu.com/p/526723604)

**社区:**
* **Slack**: 在我们的[#rlcard-project](https://join.slack.com/t/rlcard/shared_invite/zt-rkvktsaq-xkMwz8BfKupCM6zGhO01xg) slack频道参与讨论.
Expand All @@ -27,6 +28,7 @@ RLCard是一款卡牌游戏强化学习 (Reinforcement Learning, RL) 的工具
* 二群:117349516

**新闻:**
* 我们更新Jupyter Notebook的教程帮助您快速了解RLCard!请看 [RLCard 教程](https://github.com/datamllab/rlcard-tutorial).
* 所有的算法都已支持[PettingZoo](https://github.com/PettingZoo-Team/PettingZoo)接口. 请点击[这里](examples/pettingzoo). 感谢[Yifei Cheng](https://github.com/ycheng517)的贡献。
* 请关注[DouZero](https://github.com/kwai/DouZero), 一个强大的斗地主AI,以及[ICML 2021论文](https://arxiv.org/abs/2106.06135)。点击[此处](https://douzero.org/)进入在线演示。该算法同样集成到了RLCard中,详见[在斗地主中训练DMC](docs/toy-examples.md#training-dmc-on-dou-dizhu)。
* 我们的项目被用在[PettingZoo](https://github.com/PettingZoo-Team/PettingZoo)中,去看看吧!
Expand Down
63 changes: 33 additions & 30 deletions examples/human/nolimit_holdem_human.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,46 +6,49 @@
from rlcard import models
from rlcard.agents import NolimitholdemHumanAgent as HumanAgent
from rlcard.utils import print_card

# Make environment
env = rlcard.make('no-limit-holdem')

human_agent = HumanAgent(env.num_actions)
human_agent2 = HumanAgent(env.num_actions)
human_agent3 = HumanAgent(env.num_actions)
human_agent4 = HumanAgent(env.num_actions)
# random_agent = RandomAgent(num_actions=env.num_actions)

env.set_agents([human_agent, human_agent2])
env.set_agents([human_agent, human_agent2, human_agent3, human_agent4])
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We you make it 4 players instead of 2?



while (True):
print(">> Start a new game")

trajectories, payoffs = env.run(is_training=False)
# If the human does not take the final action, we need to
# print other players action
final_state = trajectories[0][-1]
action_record = final_state['action_record']
state = final_state['raw_obs']
_action_list = []
for i in range(1, len(action_record)+1):
if action_record[-i][0] == state['current_player']:
break
_action_list.insert(0, action_record[-i])
for pair in _action_list:
print('>> Player', pair[0], 'chooses', pair[1])

# Let's take a look at what the agent card is
print('=============== Cards all Players ===============')
for hands in env.get_perfect_information()['hand_cards']:
print_card(hands)

print('=============== Result ===============')
if payoffs[0] > 0:
print('You win {} chips!'.format(payoffs[0]))
elif payoffs[0] == 0:
print('It is a tie.')
else:
print('You lose {} chips!'.format(-payoffs[0]))
print('')
while(True):
trajectories, payoffs = env.run(is_training=False)
# If the human does not take the final action, we need to
# print other players action
final_state = trajectories[0][-1]
action_record = final_state['action_record']
state = final_state['raw_obs']
_action_list = []
for i in range(1, len(action_record)+1):
if action_record[-i][0] == state['current_player']:
break
_action_list.insert(0, action_record[-i])
for pair in _action_list:
print('>> Player', pair[0], 'chooses', pair[1])

# Let's take a look at what the agent card is
print('=============== Cards all Players ===============')
for hands in env.get_perfect_information()['hand_cards']:
print_card(hands)

print('=============== Result ===============')
print(payoffs)
# if payoffs[0] > 0:
# print('You win {} chips!'.format(payoffs[0]))
# elif payoffs[0] == 0:
# print('It is a tie.')
# else:
# print('You lose {} chips!'.format(-payoffs[0]))
# print('')
break

input("Press any key to continue...")
2 changes: 1 addition & 1 deletion examples/run_rl.py
Original file line number Diff line number Diff line change
Expand Up @@ -125,7 +125,7 @@ def train(args):
parser.add_argument(
'--cuda',
type=str,
default='',
default='0',
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could you make '' as default since some users may not have GPU

)
parser.add_argument(
'--seed',
Expand Down
13 changes: 9 additions & 4 deletions rlcard/agents/human_agents/nolimit_holdem_human_agent.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from rlcard.utils.utils import print_card

from rlcard.games.nolimitholdem.round import Action

class HumanAgent(object):
''' A human agent for No Limit Holdem. It can be used to play against trained models
Expand All @@ -26,10 +26,16 @@ def step(state):
'''
_print_state(state['raw_obs'], state['action_record'])
action = int(input('>> You choose action (integer): '))
while action < 0 or action >= len(state['legal_actions']):
amt = 0
if state['raw_legal_actions'][action] == Action.RAISE:
amt = int(input('>> Choose your raise amount: '))
while action < 0 or action >= len(state['legal_actions']) or\
(state['raw_legal_actions'][action] == Action.RAISE and amt < state['last_raise'] * 2):
print('Action illegel...')
action = int(input('>> Re-choose action (integer): '))
return state['raw_legal_actions'][action]
if state['raw_legal_actions'][action] == Action.RAISE:
amt = int(input('>> Choose your raise amount: '))
return state['raw_legal_actions'][action], amt

def eval_step(self, state):
''' Predict the action given the curent state for evaluation. The same to step here.
Expand Down Expand Up @@ -70,4 +76,3 @@ def _print_state(state, action_record):
print('\n=========== Actions You Can Choose ===========')
print(', '.join([str(index) + ': ' + str(action) for index, action in enumerate(state['legal_actions'])]))
print('')
print(state)
2 changes: 2 additions & 0 deletions rlcard/envs/env.py
Original file line number Diff line number Diff line change
Expand Up @@ -163,6 +163,8 @@ def run(self, is_training=False):
state = self.get_state(player_id)
trajectories[player_id].append(state)

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I believe we don't want to print trajectories in this method, which will affect all the games.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sure. Shld Set this to WIP! WIll resolve the status once the feature is done

print(trajectories[0])

# Payoffs
payoffs = self.get_payoffs()

Expand Down
2 changes: 1 addition & 1 deletion rlcard/envs/leducholdem.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ def _extract_state(self, state):
if public_card:
obs[self.card2index[public_card]+3] = 1
obs[state['my_chips']+6] = 1
obs[state['all_chips'][1]+20] = 1
obs[sum(state['all_chips'])-state['my_chips']+21] = 1
extracted_state['obs'] = obs

extracted_state['raw_obs'] = state
Expand Down
5 changes: 3 additions & 2 deletions rlcard/envs/nolimitholdem.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
from rlcard.games.nolimitholdem.round import Action

DEFAULT_GAME_CONFIG = {
'game_num_players': 2,
'game_num_players': 4,
'chips_for_each': 100,
'dealer_id': None,
}
Expand Down Expand Up @@ -69,10 +69,10 @@ def _extract_state(self, state):
obs[52] = float(my_chips)
obs[53] = float(max(all_chips))
extracted_state['obs'] = obs

extracted_state['raw_obs'] = state
extracted_state['raw_legal_actions'] = [a for a in state['legal_actions']]
extracted_state['action_record'] = self.action_recorder
extracted_state['last_raise'] = state['last_raise']

return extracted_state

Expand Down Expand Up @@ -114,6 +114,7 @@ def get_perfect_information(self):
state['hand_cards'] = [[c.get_index() for c in self.game.players[i].hand] for i in range(self.num_players)]
state['current_player'] = self.game.game_pointer
state['legal_actions'] = self.game.get_legal_actions()
state['last_raise'] = self.game.round.last_raise
return state


7 changes: 4 additions & 3 deletions rlcard/games/nolimitholdem/game.py
Original file line number Diff line number Diff line change
Expand Up @@ -113,7 +113,7 @@ def get_legal_actions(self):
"""
return self.round.get_nolimit_legal_actions(players=self.players)

def step(self, action):
def step(self, action_tp):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What is the motivation of using action_tp? What would the legal actions be with action_tp?

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

action_tp (or action_tuple) [Action, amt: int]
legal actions will only contain FOLD, RAISE, CALL,
In the case of raise, action_tp[1] indicates the amt to raise; In other cases, action_tp[1] is not used

"""
Get the next state

Expand All @@ -126,7 +126,7 @@ def step(self, action):
(dict): next player's state
(int): next player id
"""

action, amt = action_tp
if action not in self.get_legal_actions():
print(action, self.get_legal_actions())
print(self.get_state(self.game_pointer))
Expand All @@ -143,7 +143,7 @@ def step(self, action):
self.history.append((r, b, r_c, d, p, ps))

# Then we proceed to the next round
self.game_pointer = self.round.proceed_round(self.players, action)
self.game_pointer = self.round.proceed_round(self.players, action_tp)

players_in_bypass = [1 if player.status in (PlayerStatus.FOLDED, PlayerStatus.ALLIN) else 0 for player in self.players]
if self.num_players - sum(players_in_bypass) == 1:
Expand Down Expand Up @@ -206,6 +206,7 @@ def get_state(self, player_id):
state['current_player'] = self.game_pointer
state['pot'] = self.dealer.pot
state['stage'] = self.stage
state['last_raise'] = self.round.last_raise
return state

def step_back(self):
Expand Down
73 changes: 44 additions & 29 deletions rlcard/games/nolimitholdem/round.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,12 +8,13 @@
class Action(Enum):
FOLD = 0
CHECK_CALL = 1
#CALL = 2
RAISE = 2
# CALL = 2
# RAISE_3BB = 3
RAISE_HALF_POT = 2
RAISE_POT = 3
# RAISE_2POT = 5
ALL_IN = 4
# RAISE_HALF_POT = 2
# RAISE_POT = 3
# # RAISE_2POT = 5
# ALL_IN = 4
# SMALL_BLIND = 7
# BIG_BLIND = 8

Expand Down Expand Up @@ -45,6 +46,7 @@ def __init__(self, num_players, init_raise_amount, dealer, np_random):

# Raised amount for each player
self.raised = [0 for _ in range(self.num_players)]
self.last_raise = 0

def start_new_round(self, game_pointer, raised=None):
"""
Expand All @@ -58,12 +60,13 @@ def start_new_round(self, game_pointer, raised=None):
"""
self.game_pointer = game_pointer
self.not_raise_num = 0
self.last_raise = 0
if raised:
self.raised = raised
else:
self.raised = [0 for _ in range(self.num_players)]

def proceed_round(self, players, action):
def proceed_round(self, players, action_tp):
"""
Call functions from other classes to keep one round running

Expand All @@ -76,29 +79,38 @@ def proceed_round(self, players, action):
"""
player = players[self.game_pointer]

action = action_tp[0]
if action == Action.CHECK_CALL:
diff = max(self.raised) - self.raised[self.game_pointer]
self.raised[self.game_pointer] = max(self.raised)
player.bet(chips=diff)
self.not_raise_num += 1

elif action == Action.RAISE:
raise_amt = action_tp[1]
self.last_raise = raise_amt
self.raised[self.game_pointer] += raise_amt
player.bet(chips=raise_amt)
self.not_raise_num = 1

elif action == Action.ALL_IN:
all_in_quantity = player.remained_chips
self.raised[self.game_pointer] = all_in_quantity + self.raised[self.game_pointer]
player.bet(chips=all_in_quantity)

self.not_raise_num = 1
# elif action == Action.ALL_IN:
# all_in_quantity = player.remained_chips
# self.raised[self.game_pointer] = all_in_quantity + self.raised[self.game_pointer]
# player.bet(chips=all_in_quantity)

elif action == Action.RAISE_POT:
self.raised[self.game_pointer] += self.dealer.pot
player.bet(chips=self.dealer.pot)
self.not_raise_num = 1
# self.not_raise_num = 1

elif action == Action.RAISE_HALF_POT:
quantity = int(self.dealer.pot / 2)
self.raised[self.game_pointer] += quantity
player.bet(chips=quantity)
self.not_raise_num = 1
# elif action == Action.RAISE_POT:
# self.raised[self.game_pointer] += self.dealer.pot
# player.bet(chips=self.dealer.pot)
# self.not_raise_num = 1

# elif action == Action.RAISE_HALF_POT:
# quantity = int(self.dealer.pot / 2)
# self.raised[self.game_pointer] += quantity
# player.bet(chips=quantity)
# self.not_raise_num = 1

elif action == Action.FOLD:
player.status = PlayerStatus.FOLDED
Expand Down Expand Up @@ -142,22 +154,25 @@ def get_nolimit_legal_actions(self, players):
diff = max(self.raised) - self.raised[self.game_pointer]
# If the current player has no more chips after call, we cannot raise
if diff > 0 and diff >= player.remained_chips:
full_actions.remove(Action.RAISE_HALF_POT)
full_actions.remove(Action.RAISE_POT)
# full_actions.remove(Action.RAISE_HALF_POT)
# full_actions.remove(Action.RAISE_POT)
full_actions.remove(Action.ALL_IN)
full_actions.remove(Action.RAISE)
# Even if we can raise, we have to check remained chips
else:
if self.dealer.pot > player.remained_chips:
full_actions.remove(Action.RAISE_POT)
if player.remained_chips < self.last_raise:
full_actions.remove(Action.RAISE)
# if self.dealer.pot > player.remained_chips:
# full_actions.remove(Action.RAISE_POT)

if int(self.dealer.pot / 2) > player.remained_chips:
full_actions.remove(Action.RAISE_HALF_POT)
# if int(self.dealer.pot / 2) > player.remained_chips:
# full_actions.remove(Action.RAISE_HALF_POT)

# Can't raise if the total raise amount is leq than the max raise amount of this round
# If raise by pot, there is no such concern
if Action.RAISE_HALF_POT in full_actions and \
int(self.dealer.pot / 2) + self.raised[self.game_pointer] <= max(self.raised):
full_actions.remove(Action.RAISE_HALF_POT)
# if Action.RAISE_HALF_POT in full_actions and \
# int(self.dealer.pot / 2) + self.raised[self.game_pointer] <= max(self.raised):
# full_actions.remove(Action.RAISE_HALF_POT)

return full_actions

Expand Down
Binary file modified rlcard/models/pretrained/leduc_holdem_cfr/average_policy.pkl
Binary file not shown.
Binary file modified rlcard/models/pretrained/leduc_holdem_cfr/iteration.pkl
Binary file not shown.
Binary file modified rlcard/models/pretrained/leduc_holdem_cfr/policy.pkl
Binary file not shown.
Binary file modified rlcard/models/pretrained/leduc_holdem_cfr/regrets.pkl
Binary file not shown.