#include <stdio.h>
#include <string.h>
#include "mix.h"
// global variables
FILE *log_file, *values_file;
char GAME_TABLE[ROWS_NUM][COLS_NUM];
int current_table_state;
double sarsa_values[PLAYERS_NUMBER][MDP_STATES_NUMBER];
double alpha, epsilon;
int player_round = XPLAYER;
int human = EMPTY;
int winner;
int NUMBER_OF_RUNS = 500;
int run_ix;
//////////////////////////////////////
//////////// Init procedure //////////
//////////////////////////////////////
void InitGame(void) {
int r_ix, c_ix;
// init table
for (r_ix=0 ; r_ix<ROWS_NUM ; r_ix++) {
for (c_ix=0 ; c_ix<COLS_NUM ; c_ix++) {
GAME_TABLE[r_ix][c_ix] = 0;
}
}
current_table_state = 0;
// init winner
winner = FALSE;
} // end of InitGame
///////////////////////////////////////////////////////////////////////////
/////////// function IsMateExist return TRUE if found sequence _PPP_ /////
/////////// where P is PLAYER's symbol and _ is available slot /////
///////////////////////////////////////////////////////////////////////////
int IsMateExist(int player) {
int r_ix, c_ix;
for (r_ix=0 ; r_ix<ROWS_NUM ; r_ix++) {
for (c_ix=0 ; c_ix<COLS_NUM-WINNING_LENGTH ; c_ix++) {
if (GAME_TABLE[r_ix][c_ix+1] == player &&
GAME_TABLE[r_ix][c_ix+2] == player &&
GAME_TABLE[r_ix][c_ix+3] == player) {
if ( GAME_TABLE[r_ix][c_ix] == EMPTY &&
(r_ix == ROWS_NUM-1 || GAME_TABLE[r_ix+1][c_ix] != EMPTY)
&&
GAME_TABLE[r_ix][c_ix+WINNING_LENGTH] == EMPTY &&
(r_ix == ROWS_NUM-1 || GAME_TABLE[r_ix+1][c_ix+WINNING_LENGTH] != EMPTY) )
return TRUE;
}
}
}
return FALSE;
} // end of IsMateExist
/////////////////////////////////////////////////////////////////////////////////
/////// function GetEmptySlotOfCol return index of the available slot of COL ////
/////////////////////////////////////////////////////////////////////////////////
int GetEmptySlotOfCol(int col) {
int r_ix;
for (r_ix=ROWS_NUM-1 ; r_ix >=0 ; r_ix--) {
if ( GAME_TABLE[r_ix][col] == EMPTY) {
return r_ix;
}
}
return NOT_AVAILABLE_COL;
} // end of GetEmptySlotOfCol
///////////////////////////////////////////////////////////////////////////
/////////// function IsChessExist return TRUE if found sequence of /////
/////////// WINNING_LENGTH-1 PLAYER's symbol where the missing one is /////
/////////// empty. If POTENTIAL is FALSE then check also if the empty /////
/////////// slot is available. /////
///////////////////////////////////////////////////////////////////////////
int IsChessExist(int player, int potential) {
int r_ix, c_ix, w_ix;
int counter, empty_slot, seq;
// check horizontal sequence
for (r_ix=0 ; r_ix<ROWS_NUM ; r_ix++) {
for (c_ix=0 ; c_ix<COLS_NUM-WINNING_LENGTH+1 ; c_ix++) {
counter = 0;
empty_slot = -1;
for (w_ix=0 ; w_ix<WINNING_LENGTH ; w_ix++) {
if (GAME_TABLE[r_ix][c_ix+w_ix] == player) counter++;
else if (GAME_TABLE[r_ix][c_ix+w_ix] == EMPTY) empty_slot = c_ix+w_ix;
}
// check if all but one empty slot are of PLAYER's symbol
if (counter == WINNING_LENGTH-1 && empty_slot != -1) {
// check if not Mate already
if ( c_ix == 0 ||
c_ix+WINNING_LENGTH-1 == COLS_NUM ||
(GAME_TABLE[r_ix][c_ix-1] != EMPTY || GAME_TABLE[r_ix][c_ix+WINNING_LENGTH-1] != EMPTY) ) {
// check if potential or real
if (potential ||
(r_ix == ROWS_NUM-1 || GAME_TABLE[r_ix+1][empty_slot] != EMPTY))
return TRUE;
}
}
}
}
// check vertical sequence
if (! potential) {
for (c_ix=0 ; c_ix<COLS_NUM ; c_ix++) {
r_ix = GetEmptySlotOfCol(c_ix);
if (r_ix != NOT_AVAILABLE_COL) {
if (r_ix < ROWS_NUM-WINNING_LENGTH+1) {
seq = TRUE;
for (w_ix=1 ; w_ix<WINNING_LENGTH ; w_ix++) {
if (GAME_TABLE[r_ix+w_ix][c_ix] != player) {
seq = FALSE;
break;
}
}
if (seq) return TRUE;
}
}
}
}
return FALSE;
} // end of IsChessExist
///////////////////////////////////////////////////////////////////////////
/////////// function IsWinningSequenceExist return TRUE if PLAYER has /////
/////////// a winning sequence, FALSE otherwise. /////
///////////////////////////////////////////////////////////////////////////
int IsWinningSequenceExist(int player) {
int r_ix, c_ix, w_ix;
int seq;
// check vertical sequence
for (c_ix=0 ; c_ix<COLS_NUM ; c_ix++) {
for (r_ix=0 ; r_ix<ROWS_NUM-WINNING_LENGTH+1 ; r_ix++) {
seq = TRUE;
for (w_ix=0 ; w_ix<WINNING_LENGTH ; w_ix++) {
if (GAME_TABLE[r_ix+w_ix][c_ix] != player) {
seq = FALSE;
break;
}
}
if (seq) return TRUE;
}
}
// check horizontal sequence
for (r_ix=0 ; r_ix<ROWS_NUM ; r_ix++) {
for (c_ix=0 ; c_ix<COLS_NUM-WINNING_LENGTH+1 ; c_ix++) {
seq = TRUE;
for (w_ix=0 ; w_ix<WINNING_LENGTH ; w_ix++) {
if (GAME_TABLE[r_ix][c_ix+w_ix] != player) {
seq = FALSE;
break;
}
}
if (seq) return TRUE;
}
}
return FALSE;
} // end of IsWinningSequenceExist
//////////////////////////////////////////////////////////////////////////////
/////////// procedure GetTableState return order number of the state ////////
/////////// in which the game is according to GAME_TABLE ////////
//////////////////////////////////////////////////////////////////////////////
int GetTableState() {
int state;
if (IsWinningSequenceExist(XPLAYER)) return X_WINNING_STATE;
if (IsWinningSequenceExist(OPLAYER)) return O_WINNING_STATE;
// calculate first bit
state = (player_round == OPLAYER);
// calculate second bit
state = state + 2*(IsMateExist(XPLAYER));
// calculate third bit
state = state + 4*(IsMateExist(OPLAYER));
// calculate fourth bit
state = state + 8*(IsChessExist(XPLAYER, FALSE));
// calculate fifth bit
state = state + 16*(IsChessExist(OPLAYER, FALSE));
// calculate sixth bit
state = state + 32*(IsChessExist(XPLAYER, TRUE));
// calculate seventh bit
state = state + 64*(IsChessExist(OPLAYER, TRUE));
return state;
} // end of GetTableState
//////////////////////////////////////////////////////////////////////////////////
///////// procedure PrintSarsaValuesToLog print the sarsa values to FILE /////////
//////////////////////////////////////////////////////////////////////////////////
void PrintSarsaValuesToLog(FILE *file) {
int plr_ix, val_ix;
for (plr_ix=0 ; plr_ix<PLAYERS_NUMBER ; plr_ix++) {
for (val_ix=0 ; val_ix<MDP_STATES_NUMBER ; val_ix++) {
fprintf(file, "%2.3f ", sarsa_values[plr_ix][val_ix]);
if (val_ix%10 == 9) fprintf(file, "\n");
}
}
fprintf(file, "\n");
}
/////////////////////////////////////////////////////////////////////////////////////////
///////// procedure ReadSarsaValuesFromFile init sarsa values according to FILE /////////
/////////////////////////////////////////////////////////////////////////////////////////
void ReadSarsaValuesFromFile(FILE *file) {
int plr_ix, val_ix;
float val;
for (plr_ix=0 ; plr_ix<PLAYERS_NUMBER ; plr_ix++) {
for (val_ix=0 ; val_ix<MDP_STATES_NUMBER ; val_ix++) {
fscanf(values_file, "%f ", &val);
sarsa_values[plr_ix][val_ix] = val;
}
}
}
////////////////////////////////////////////////////////////////////////////////
///////// procedure PrintTable print GAME_TABLE to log file or screen /////////
////////////////////////////////////////////////////////////////////////////////
void PrintTable(int TO_LOG) {
int r_ix, c_ix;
char ch;
for (r_ix=0 ; r_ix<ROWS_NUM ; r_ix++) {
if (TO_LOG) fprintf(log_file, "|");
else printf("|");
for (c_ix=0 ; c_ix<COLS_NUM ; c_ix++) {
if (GAME_TABLE[r_ix][c_ix] == EMPTY) ch = ' ';
else if (GAME_TABLE[r_ix][c_ix] == XPLAYER) ch = 'X';
else ch = 'O';
if (TO_LOG) fprintf(log_file, " %c ", ch);
else printf(" %c ", ch);
}
if (TO_LOG) fprintf(log_file, "|\n");
else printf("|\n");
}
if (TO_LOG) fprintf(log_file, "|");
else printf("|");
for (c_ix=0 ; c_ix<COLS_NUM ; c_ix++) {
if (TO_LOG) fprintf(log_file, "---");
else printf("---");
}
if (TO_LOG) fprintf(log_file, "|\n");
else printf("|\n");
if (TO_LOG) fprintf(log_file, "|");
else printf("|");
for (c_ix=0 ; c_ix<COLS_NUM ; c_ix++) {
if (TO_LOG) fprintf(log_file, "-%d-", (c_ix+1)%10);
else printf("-%d-", (c_ix+1)%10);
}
if (TO_LOG) fprintf(log_file, "|\n");
else printf("|\n");
// if (TO_LOG) fprintf(log_file, "STATE = %d, TURN = %d \n", GetTableState(GAME_TABLE), player_round);
printf("STATE = %d, TURN = %d \n", current_table_state, player_round);
if (TO_LOG) PrintSarsaValuesToLog(log_file);
} // end of PrintTable
//////////////////////////////////////////////////////////////////////////
/////////// procedure UpdateSarsaValues update PREV_STATE value /////////
/////////// according to current state /////////
//////////////////////////////////////////////////////////////////////////
void UpdateSarsaValues(int prev_state) {
sarsa_values[XPLAYER-1][prev_state] = (double) ((1-alpha) * sarsa_values[XPLAYER-1][prev_state] +
alpha * sarsa_values[XPLAYER-1][current_table_state]);
sarsa_values[OPLAYER-1][prev_state] = (double) ((1-alpha) * sarsa_values[OPLAYER-1][prev_state] +
alpha * sarsa_values[OPLAYER-1][current_table_state]);
} // end of UpdateSarsaValues
/////////////////////////////////////////////////////////////////////////
/////////// Procedure ChangeTurn set round to be of other player ////////
/////////////////////////////////////////////////////////////////////////
void ChangeTurn(void) {
if (player_round == XPLAYER) player_round = OPLAYER;
else player_round = XPLAYER;
}
//////////////////////////////////////////////////////////////////////////////////
/////////// procedure InsertToTable put PLAYERS's symbol in COL, ///////////
/////////// change turn and update sarsa values ///////////
//////////////////////////////////////////////////////////////////////////////////
void InsertToTable(int col) {
int row = GetEmptySlotOfCol(col);
int prev_state = current_table_state;
if (row==NOT_AVAILABLE_COL) {
printf("NOT LEGAL PLAY ! (%d,%d)\n", row, col);
exit();
}
GAME_TABLE[row][col] = player_round;
ChangeTurn();
current_table_state = GetTableState();
UpdateSarsaValues(prev_state);
} // end of InsertToTable
//////////////////////////////////////////////////////////////////////
/////////// procedure RemoveFromTable remove one player's ///////////
/////////// symbol from COL. ///////////
//////////////////////////////////////////////////////////////////////
void RemoveFromTable(int col) {
int r_ix;
ChangeTurn();
for (r_ix=0 ; r_ix<ROWS_NUM ; r_ix++) {
if (GAME_TABLE[r_ix][col] != EMPTY) {
GAME_TABLE[r_ix][col] = EMPTY;
break;
}
}
current_table_state = GetTableState();
} // end of RemoveFromTable
///////////////////////////////////////////////////////////////////////////
/////////// Procedure CalculateMax return the index of the biggest ////////
/////////// value in VAL_ARRAY. ////////
///////////////////////////////////////////////////////////////////////////
int CalculateMax(double val_array[COLS_NUM]) {
int ix;
int max_ix;
int multiple_max = 0;
double max_val;
max_val = val_array[0];
max_ix = 0;
for (ix=1 ; ix <COLS_NUM ; ix++) {
if (max_val < val_array[ix]) {
max_val = val_array[ix];
max_ix = ix;
multiple_max = 0;
}
else if (max_val == val_array[ix]) {
multiple_max = -1; // indicate more than one maximum
}
}
if (multiple_max == -1) { // get one random max val
do
max_ix = rand()%COLS_NUM;
while (val_array[max_ix] != max_val);
}
if (human != EMPTY) printf ("I put mine in col %d. \n", max_ix+1);
return max_ix;
} // end of CalculateMax
////////////////////////////////////////////////////////////////////////////
/////////// Procedure CalculateMin return the index of the smallest ////////
/////////// value in VAL_ARRAY. ////////
////////////////////////////////////////////////////////////////////////////
int CalculateMin(double val_array[COLS_NUM]) {
int ix;
int min_ix;
int multiple_min = 0;
double min_val;
min_val = val_array[0];
min_ix = 0;
for (ix=1 ; ix <COLS_NUM ; ix++) {
if (min_val > val_array[ix]) {
min_val = val_array[ix];
min_ix = ix;
multiple_min = 0;
}
else if (min_val == val_array[ix]) {
multiple_min = -1; // indicate more than one maximum
}
}
if (multiple_min == -1) { // get one random min val
do
min_ix = rand()%COLS_NUM;
while (val_array[min_ix] != min_val);
}
//if (human != EMPTY) printf ("--min- %d(%2.3f) --- \n", min_ix+1, val_array[min_ix]);
//fprintf (log_file, "--min- %d(%2.3f) --- \n", min_ix+1, val_array[min_ix]);
return min_ix;
} // end of CalculateMin
/////////////////////////////////////////////////////////////////////
/////////// function EndOfGame return FALSE if game is not over, ////
/////////// winner code otherwise ////
/////////////////////////////////////////////////////////////////////
int EndOfGame(void) {
int c_ix;
int full_table = TRUE;
// check existence of unoccupied place
for (c_ix=0 ; c_ix<COLS_NUM ; c_ix++) {
if (GAME_TABLE[0][c_ix] == EMPTY) {
full_table = FALSE;
break;
}
}
if (full_table) return DRAW;
if (IsWinningSequenceExist(XPLAYER)) return XPLAYER;
if (IsWinningSequenceExist(OPLAYER)) return OPLAYER;
return FALSE;
} // end of EndOfGame
////////////////////////////////////////////////////////////////////////////////////
/////////// procedure CalculateFutureValues calculates future sarsa values /////////
/////////// for any avaliable column for the next two plays /////////
////////////////////////////////////////////////////////////////////////////////////
void CalculateFutureValues(double future_values[COLS_NUM][COLS_NUM]) {
int col_ix1, col_ix2;
int acting_player = player_round-1;
int is_draw;
for (col_ix1=0 ; col_ix1<COLS_NUM ; col_ix1++) {
if (GAME_TABLE[0][col_ix1] != EMPTY)
// not available col
for (col_ix2=0 ; col_ix2<COLS_NUM ; col_ix2++)
future_values[col_ix1][col_ix2] = (double) 2;
else {
// available col
// simulate player play
InsertToTable(col_ix1);
// simulate other play
is_draw = (EndOfGame() == DRAW);
for (col_ix2=0 ; col_ix2<COLS_NUM ; col_ix2++) {
if (current_table_state == X_WINNING_STATE || current_table_state == O_WINNING_STATE) {
future_values[col_ix1][col_ix2] = sarsa_values[acting_player][current_table_state];
}
else {
if (is_draw) future_values[col_ix1][col_ix2] = 1.5;
else {
if (GAME_TABLE[0][col_ix2] != EMPTY)
// not available col
future_values[col_ix1][col_ix2] = (double) 2;
else {
// available col
InsertToTable(col_ix2);
future_values[col_ix1][col_ix2] = sarsa_values[acting_player][current_table_state];
RemoveFromTable(col_ix2);
}
}
}
}
RemoveFromTable(col_ix1);
}
}
} // end of CalculateFutureValues
/////////////////////////////////////////////////////////////////////////////////
/////////// Procedure GetPlay choose action for player according to eps/////
/////////// if exploration then randomiclly if exploitation then accordng /////
/////////// to sarsa values. /////
/////////////////////////////////////////////////////////////////////////////////
void GetPlay() {
int col;
int rnd;
int col_ix, col_ix1;
double future_future_values[COLS_NUM][COLS_NUM];
double future_values[COLS_NUM];
rnd = rand()%100;
if ( (100 * epsilon) < rnd) {
if (human != EMPTY) printf("explore...\n");
// exploration - get random free col
do
col = rand()%COLS_NUM;
while (GAME_TABLE[0][col] != EMPTY);
}
else {
// exploitation - compute future values for any avaliable column
CalculateFutureValues(future_future_values);
for (col_ix=0 ; col_ix<COLS_NUM ; col_ix++) {
future_values[col_ix] = future_future_values[col_ix][CalculateMin(future_future_values[col_ix])];
if (future_values[col_ix] == 2) future_values[col_ix] = -2; // anavilable col
}
/* fprintf(log_file, "+++++++++++++++++++++++++++++++ \n"); */
/* for (col_ix=0 ; col_ix<COLS_NUM ; col_ix++) { */
/* for (col_ix1=0 ; col_ix1<COLS_NUM ; col_ix1++) { */
/* fprintf(log_file, " %2.3f", future_future_values[col_ix][col_ix1]); */
/* } */
/* fprintf(log_file, " \n"); */
/* } */
/* fprintf(log_file, "------------------------------- \n"); */
/* for (col_ix=0 ; col_ix<COLS_NUM ; col_ix++) { */
/* fprintf(log_file, " %2.3f", future_values[col_ix]); */
/* } */
/* fprintf(log_file, " \n"); */
/* fprintf(log_file, "+++++++++++++++++++++++++++++++ \n"); */
col = CalculateMax(future_values);
}
InsertToTable(col);
} // end of GetPlay
///////////////////////////////////////////////////
/////////////// main procedure ////////////////////
///////////////////////////////////////////////////
void main(int argc, char *argv[]) {
time_t dummy; // for random
char learning_need = TRUE;
char selection;
char str_selection[10];
int col_selection;
int plr_ix, val_ix;
int col;
// init game
InitGame();
// for random
srand(time(&dummy));
// sarsa value initialization
if ((values_file = fopen("mix.init", "r")) == NULL) {
printf("can't open file %s - will perform %d learning games. \n", "mix.init", NUMBER_OF_RUNS);
for (plr_ix=0 ; plr_ix<PLAYERS_NUMBER ; plr_ix++) {
for (val_ix=0 ; val_ix<MDP_STATES_NUMBER ; val_ix++) {
sarsa_values[plr_ix][val_ix] = (double) 0;
}
}
sarsa_values[XPLAYER-1][X_WINNING_STATE] = (double) 1;
sarsa_values[XPLAYER-1][O_WINNING_STATE] = (double) -1;
sarsa_values[OPLAYER-1][X_WINNING_STATE] = (double) -1;
sarsa_values[OPLAYER-1][O_WINNING_STATE] = (double) 1;
}
else {
printf("Init sarsa values from file %s. \n", "mix.init");
ReadSarsaValuesFromFile(values_file);
fclose(values_file);
learning_need = FALSE;
epsilon = 1; // exploit always
}
if (learning_need) {
// open log file
if ((log_file = fopen(LOG_FILE_NAME, "w")) == NULL) {
printf("can't open file %s! \n", LOG_FILE_NAME);
return;
}
// computer vs computer
for (run_ix=1 ; run_ix<=NUMBER_OF_RUNS ; run_ix++) {
alpha = (double) 1/(run_ix/(NUMBER_OF_RUNS/10)+1);
epsilon = (double) run_ix/NUMBER_OF_RUNS;
fprintf(log_file, "--------- RUN NUMBER %d ---------\n", run_ix);
printf("--------- RUN NUMBER %d (alpha %2.3f epsilon %2.3f) \n", run_ix, alpha, epsilon);
while (! winner) {
GetPlay();
winner = EndOfGame();
}
fprintf(log_file, " winner is player %d (state %d) \n", winner, current_table_state);
PrintSarsaValuesToLog(log_file);
InitGame();
}
} // learning need
// human vs computer
while (selection != 'q') {
InitGame();
printf("You may choose your symbol [x|o] (or [q]uit) : ");
while (selection != 'q' && selection != 'x' && selection != 'o') {
selection = getchar();
}
switch(selection) {
case 'q' :
break;
case 'x' :
human = XPLAYER;
break;
case 'o' :
human = OPLAYER;
break;
default :
printf(" > %c Unknown command. \n", selection);
}
if (selection != 'q') {
selection = ' ';
player_round = XPLAYER;
while (! winner) {
if (player_round != human) {
GetPlay();
PrintTable(FALSE);
}
else {
printf("You put your's in col : ");
col_selection = 0;
while (col_selection < 1 || col_selection > COLS_NUM) {
gets(str_selection);
col_selection = atoi(str_selection);
}
InsertToTable(col_selection-1);
}
winner = EndOfGame();
}
printf(" winner is player %d. \n\n", winner);
}
} // end of while
// close log file
fclose(log_file);
// save sarsa values
if ((values_file = fopen("sarsa_values.out", "w")) == NULL) {
printf("can't open file %s! \n", "sarsa_values.out");
}
else PrintSarsaValuesToLog(values_file);
fclose(values_file);
}