K리그 축구 승부 예측 프로그램 (Python 부분)
예전에 진행했던 https://sno-machinelearning.tistory.com/52?category=858218 의 Python 코드입니다.
다른 프로그램 코드는 제가 작성한 것이 아니라서 올리지 않겠습니다.
밑에 있는 코드는 여태까지(그때 기준으로요...) 있었던 경기의 결과로 각 팀들이 홈팀과 원정팀을 갔을 때 어떤 요인이 골에 영향이 가는지 정하고 새로운 홈팀과 원정팀에 대한 득점수를 예측하여 경기를 누가 이길 확률이 높은지 계산하는 코드입니다.
if a==1:
# 필요 패키지 다운로드
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.ticker as ticker
import matplotlib.ticker as plticker
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
import os
import tensorflow as tf
import statsmodels.formula.api as sm
from patsy import dmatrices
import pymysql #pip install PyMySQL
# 2019년 제외 DB 불러오기
conn = pymysql.connect(host='125.176.85.154',port=3380, user='root', password='1234',
db='server', charset='utf8')
curs = conn.cursor()
sql = "select * from not2019"
curs.execute(sql)
rows = curs.fetchall()
# Connection 닫기
conn.close()
# df로 변경
not2019 = pd.DataFrame(list(rows),columns=['K_NUMBER','K_DAY','HOME','AWAY',\
'HOME_GOAL','AWAY_GOAL','HOME_SHOT','AWAY_SHOT',\
'HOME_TSHOT','AWAY_TSHOT','HOME_PASHOT',\
'AWAY_PASHOT','HOME_PATSHOT','AWAY_PATSHOT',\
'HOME_DRB','AWAY_DRB','HOME_FSIN','AWAY_FSIN',\
'HOME_PASS','AWAY_PASS','HOME_KPASS','AWAY_KPASS'\
,'HOME_FPASS','AWAY_FPASS','HOME_BPASS',\
'AWAY_BPASS','HOME_LPASS','AWAY_LPASS',\
'HOME_CROSS','AWAY_CROSS','HOME_PASSSU',\
'AWAY_PASSSU','HOME_BATGRD','AWAY_BATGRD',\
'HOME_BATAIR','AWAY_BATAIR','HOME_TACK',\
'AWAY_TACK','HOME_INTERSEP','AWAY_INTERSEP',\
'HOME_FOUL','AWAY_FOUL','HOME_YLLCARD',\
'AWAY_YLLCARD','HOME_REDCARD','AWAY_REDCARD'])
# 필요없는 변수 제거 제거
not2019.drop('K_NUMBER',axis=1, inplace = True)
not2019.drop('K_DAY',axis=1, inplace = True)
not2019.drop('HOME_GOAL',axis=1, inplace = True)
not2019.drop('AWAY_GOAL',axis=1, inplace = True)
# 데이터 형식 변경
not2019.HOME_SHOT=pd.to_numeric(not2019.HOME_SHOT)
not2019.AWAY_SHOT=pd.to_numeric(not2019.AWAY_SHOT)
not2019.HOME_TSHOT=pd.to_numeric(not2019.HOME_TSHOT)
not2019.AWAY_TSHOT=pd.to_numeric(not2019.AWAY_TSHOT)
not2019.HOME_PASHOT=pd.to_numeric(not2019.HOME_PASHOT)
not2019.AWAY_PASHOT=pd.to_numeric(not2019.AWAY_PASHOT)
not2019.HOME_PATSHOT=pd.to_numeric(not2019.HOME_PATSHOT)
not2019.AWAY_PATSHOT=pd.to_numeric(not2019.AWAY_PATSHOT)
not2019.HOME_DRB=pd.to_numeric(not2019.HOME_DRB)
not2019.AWAY_DRB=pd.to_numeric(not2019.AWAY_DRB)
not2019.HOME_FSIN=pd.to_numeric(not2019.HOME_FSIN)
not2019.AWAY_FSIN=pd.to_numeric(not2019.AWAY_FSIN)
not2019.HOME_PASS=pd.to_numeric(not2019.HOME_PASS)
not2019.AWAY_PASS=pd.to_numeric(not2019.AWAY_PASS)
not2019.HOME_KPASS=pd.to_numeric(not2019.HOME_KPASS)
not2019.AWAY_KPASS=pd.to_numeric(not2019.AWAY_KPASS)
not2019.HOME_FPASS=pd.to_numeric(not2019.HOME_FPASS)
not2019.AWAY_FPASS=pd.to_numeric(not2019.AWAY_FPASS)
not2019.HOME_BPASS=pd.to_numeric(not2019.HOME_BPASS)
not2019.AWAY_BPASS=pd.to_numeric(not2019.AWAY_BPASS)
not2019.HOME_LPASS=pd.to_numeric(not2019.HOME_LPASS)
not2019.AWAY_LPASS=pd.to_numeric(not2019.AWAY_LPASS)
not2019.HOME_CROSS=pd.to_numeric(not2019.HOME_CROSS)
not2019.AWAY_CROSS=pd.to_numeric(not2019.AWAY_CROSS)
not2019.HOME_PASSSU=pd.to_numeric(not2019.HOME_PASSSU)
not2019.AWAY_PASSSU=pd.to_numeric(not2019.AWAY_PASSSU)
not2019.HOME_BATGRD=pd.to_numeric(not2019.HOME_BATGRD)
not2019.AWAY_BATGRD=pd.to_numeric(not2019.AWAY_BATGRD)
not2019.HOME_BATAIR=pd.to_numeric(not2019.HOME_BATAIR)
not2019.AWAY_BATAIR=pd.to_numeric(not2019.AWAY_BATAIR)
not2019.HOME_TACK=pd.to_numeric(not2019.HOME_TACK)
not2019.AWAY_TACK=pd.to_numeric(not2019.AWAY_TACK)
not2019.HOME_INTERSEP=pd.to_numeric(not2019.HOME_INTERSEP)
not2019.AWAY_INTERSEP=pd.to_numeric(not2019.AWAY_INTERSEP)
not2019.HOME_FOUL=pd.to_numeric(not2019.HOME_FOUL)
not2019.AWAY_FOUL=pd.to_numeric(not2019.AWAY_FOUL)
not2019.HOME_YLLCARD=pd.to_numeric(not2019.HOME_YLLCARD)
not2019.AWAY_YLLCARD=pd.to_numeric(not2019.AWAY_YLLCARD)
not2019.HOME_REDCARD=pd.to_numeric(not2019.HOME_REDCARD)
not2019.AWAY_REDCARD=pd.to_numeric(not2019.AWAY_REDCARD)
## 홈 PA 유효슈팅 구하기
## 다중 선형 회귀 분석 (Multiple regression)
# 홈 PA샷의 상관계수표
corr = not2019.corr(method = 'pearson')
# 홈 PATSHOT의 선형회귀표 (모든 변수들을 넣은 상태에서 변수선택법 중 후진제거법을 사용)
model = sm.ols(formula = 'HOME_PATSHOT ~ HOME_SHOT + HOME_TSHOT + HOME_PASHOT + HOME_DRB + HOME_KPASS',data=not2019).fit()
#model.summary()
# 선택된 변수들을 메트릭스 표로 변경
multi_y = np.array(not2019.HOME_PATSHOT)
df_x = not2019[["HOME_SHOT", "HOME_TSHOT", "HOME_PASHOT", "HOME_DRB" , "HOME_KPASS"]]
multi_x = df_x.as_matrix()
# 모델 틀 설계
with tf.Graph().as_default() as multi_regresstion:
num_x = 5
X = tf.placeholder(tf.float32, [None, num_x], name='X')
Y = tf.placeholder(tf.float32, [None], name='Y')
lr = tf.constant(1e-3,tf.float32)
W = tf.get_variable("W", [1,num_x], tf.float32)
b = tf.get_variable("b",dtype=tf.float32,initializer=tf.constant(1.,tf.float32))
h = tf.matmul(W,X,transpose_b=True) + b
cost = tf.reduce_mean(tf.square(tf.subtract(h,Y)))
train = tf.train.GradientDescentOptimizer(lr).minimize(cost)
# 모델 틀에 메트릭스를 넣음
with tf.Session(graph=multi_regresstion) as sess:
sess.run(tf.global_variables_initializer())
for i in range(3000):
_,l = sess.run([train,cost],feed_dict={X:multi_x,Y:multi_y})
W_multi, b_multi = sess.run([W,b])
# 계수 구하기
for j in range(len(multi_x)):
sum = 0
for i in range(5):
sum += W_multi[0][i] * multi_x[j,i]
# 홈 PA유효슈팅 계수 저장
HOME_W_multi,HOME_b_multi = W_multi,b_multi
## 어웨이 PA 유효슈팅 구하기
# 어웨이 선형회귀표(변수선택법 = 후진제거법)
Away_model = sm.ols(formula = 'AWAY_PATSHOT ~ AWAY_SHOT + AWAY_TSHOT + AWAY_PASHOT + AWAY_DRB + AWAY_KPASS',data=not2019).fit()
multi_y = np.array(not2019.AWAY_PATSHOT)
df_x = not2019[["AWAY_SHOT", "AWAY_TSHOT", "AWAY_PASHOT", "AWAY_DRB" , "AWAY_KPASS"]]
multi_x = df_x.as_matrix()
with tf.Graph().as_default() as multi_regresstion:
num_x = 5
X = tf.placeholder(tf.float32, [None, num_x], name='X')
Y = tf.placeholder(tf.float32, [None], name='Y')
lr = tf.constant(1e-3,tf.float32)
W = tf.get_variable("W", [1,num_x], tf.float32)
b = tf.get_variable("b",dtype=tf.float32,initializer=tf.constant(1.,tf.float32))
h = tf.matmul(W,X,transpose_b=True) + b
cost = tf.reduce_mean(tf.square(tf.subtract(h,Y)))
train = tf.train.GradientDescentOptimizer(lr).minimize(cost)
with tf.Session(graph=multi_regresstion) as sess:
sess.run(tf.global_variables_initializer())
for i in range(3000):
_,l = sess.run([train,cost],feed_dict={X:multi_x,Y:multi_y})
W_multi, b_multi = sess.run([W,b])
for j in range(len(multi_x)):
sum = 0
for i in range(5):
sum += W_multi[0][i] * multi_x[j,i]
# 어웨이 PA유효슈팅 계수
AWAY_W_multi,AWAY_b_multi = W_multi,b_multi
## 새로운 데이터 예측
conn = pymysql.connect(host='125.176.85.154',port=3380, user='root', password='1234',
db='server', charset='utf8')
curs = conn.cursor()
sql = "select * from rawdb"
curs.execute(sql)
rowdata = curs.fetchall()
if rowdata != ():
sql = "DELETE FROM rawdb WHERE K_NUMBER = '%s'" % '0'
curs.execute(sql)
conn.commit()
# Connection 닫기
conn.close()
try:
if rowdata != ():
conn = pymysql.connect(host='125.176.85.154',port=3380, user='root', password='1234',
db='server', charset='utf8')
curs = conn.cursor()
sql = "select * from kleague" # 경기 번호불러오기 위한 데이터
curs.execute(sql)
raw = curs.fetchall()
num = (int(raw[-1][0]))+1
except:
rowdata = ()
# rowdf 만들기
try:
if rowdata != ():
rowdf1 = pd.DataFrame(list(rowdata),columns=['K_NUMBER','K_DAY','HOME','AWAY',\
'HOME_GOAL','AWAY_GOAL','HOME_SHOT','AWAY_SHOT',\
'HOME_TSHOT','AWAY_TSHOT','HOME_PASHOT',\
'AWAY_PASHOT','HOME_PATSHOT','AWAY_PATSHOT',\
'HOME_DRB','AWAY_DRB','HOME_FSIN','AWAY_FSIN',\
'HOME_PASS','AWAY_PASS','HOME_KPASS','AWAY_KPASS'\
,'HOME_FPASS','AWAY_FPASS','HOME_BPASS',\
'AWAY_BPASS','HOME_LPASS','AWAY_LPASS',\
'HOME_CROSS','AWAY_CROSS','HOME_PASSSU',\
'AWAY_PASSSU','HOME_BATGRD','AWAY_BATGRD',\
'HOME_BATAIR','AWAY_BATAIR','HOME_TACK',\
'AWAY_TACK','HOME_INTERSEP','AWAY_INTERSEP',\
'HOME_FOUL','AWAY_FOUL','HOME_YLLCARD',\
'AWAY_YLLCARD','HOME_REDCARD','AWAY_REDCARD'])
rowdf = rowdf1[0:1]
except:
rowdata = ()
try:
if rowdata != ():
rowdf['K_NUMBER'] = num
rowdf['K_DAY'] = rowdf['K_DAY'].astype(object)
rowdf['HOME'] = rowdf['HOME'].astype(object)
rowdf['AWAY'] = rowdf['AWAY'].astype(object)
rowdf.HOME_GOAL=pd.to_numeric(rowdf.HOME_GOAL)
rowdf.AWAY_GOAL=pd.to_numeric(rowdf.AWAY_GOAL)
rowdf.HOME_SHOT=pd.to_numeric(rowdf.HOME_SHOT)
rowdf.AWAY_SHOT=pd.to_numeric(rowdf.AWAY_SHOT)
rowdf.HOME_TSHOT=pd.to_numeric(rowdf.HOME_TSHOT)
rowdf.AWAY_TSHOT=pd.to_numeric(rowdf.AWAY_TSHOT)
rowdf.HOME_PASHOT=pd.to_numeric(rowdf.HOME_PASHOT)
rowdf.AWAY_PASHOT=pd.to_numeric(rowdf.AWAY_PASHOT)
rowdf.HOME_PATSHOT=pd.to_numeric(rowdf.HOME_PATSHOT)
rowdf.AWAY_PATSHOT=pd.to_numeric(rowdf.AWAY_PATSHOT)
rowdf.HOME_DRB=pd.to_numeric(rowdf.HOME_DRB)
rowdf.AWAY_DRB=pd.to_numeric(rowdf.AWAY_DRB)
rowdf.HOME_FSIN=pd.to_numeric(rowdf.HOME_FSIN)
rowdf.AWAY_FSIN=pd.to_numeric(rowdf.AWAY_FSIN)
rowdf.HOME_PASS=pd.to_numeric(rowdf.HOME_PASS)
rowdf.AWAY_PASS=pd.to_numeric(rowdf.AWAY_PASS)
rowdf.HOME_KPASS=pd.to_numeric(rowdf.HOME_KPASS)
rowdf.AWAY_KPASS=pd.to_numeric(rowdf.AWAY_KPASS)
rowdf.HOME_FPASS=pd.to_numeric(rowdf.HOME_FPASS)
rowdf.AWAY_FPASS=pd.to_numeric(rowdf.AWAY_FPASS)
rowdf.HOME_BPASS=pd.to_numeric(rowdf.HOME_BPASS)
rowdf.AWAY_BPASS=pd.to_numeric(rowdf.AWAY_BPASS)
rowdf.HOME_LPASS=pd.to_numeric(rowdf.HOME_LPASS)
rowdf.AWAY_LPASS=pd.to_numeric(rowdf.AWAY_LPASS)
rowdf.HOME_CROSS=pd.to_numeric(rowdf.HOME_CROSS)
rowdf.AWAY_CROSS=pd.to_numeric(rowdf.AWAY_CROSS)
rowdf.HOME_PASSSU=pd.to_numeric(rowdf.HOME_PASSSU)
rowdf.AWAY_PASSSU=pd.to_numeric(rowdf.AWAY_PASSSU)
rowdf.HOME_BATGRD=pd.to_numeric(rowdf.HOME_BATGRD)
rowdf.AWAY_BATGRD=pd.to_numeric(rowdf.AWAY_BATGRD)
rowdf.HOME_BATAIR=pd.to_numeric(rowdf.HOME_BATAIR)
rowdf.AWAY_BATAIR=pd.to_numeric(rowdf.AWAY_BATAIR)
rowdf.HOME_TACK=pd.to_numeric(rowdf.HOME_TACK)
rowdf.AWAY_TACK=pd.to_numeric(rowdf.AWAY_TACK)
rowdf.HOME_INTERSEP=pd.to_numeric(rowdf.HOME_INTERSEP)
rowdf.AWAY_INTERSEP=pd.to_numeric(rowdf.AWAY_INTERSEP)
rowdf.HOME_FOUL=pd.to_numeric(rowdf.HOME_FOUL)
rowdf.AWAY_FOUL=pd.to_numeric(rowdf.AWAY_FOUL)
rowdf.HOME_YLLCARD=pd.to_numeric(rowdf.HOME_YLLCARD)
rowdf.AWAY_YLLCARD=pd.to_numeric(rowdf.AWAY_YLLCARD)
rowdf.HOME_REDCARD=pd.to_numeric(rowdf.HOME_REDCARD)
rowdf.AWAY_REDCARD=pd.to_numeric(rowdf.AWAY_REDCARD)
except:
rowdata = ()
if rowdata != ():
rowdf.HOME_PATSHOT = HOME_W_multi[0][0]*rowdf.HOME_SHOT+HOME_W_multi[0][1]*rowdf.HOME_TSHOT+\
HOME_W_multi[0][2]*rowdf.HOME_PASHOT+HOME_W_multi[0][3]*rowdf.HOME_DRB+HOME_W_multi[0][4]*\
rowdf.HOME_KPASS+HOME_b_multi
rowdf["HOME_PATSHOT"] = rowdf["HOME_PATSHOT"].astype(int)
rowdf.AWAY_PATSHOT = AWAY_W_multi[0][0]*rowdf.AWAY_SHOT+AWAY_W_multi[0][1]*rowdf.AWAY_TSHOT+\
AWAY_W_multi[0][2]*rowdf.AWAY_PASHOT+AWAY_W_multi[0][3]*rowdf.AWAY_DRB+AWAY_W_multi[0][4]*\
rowdf.AWAY_KPASS+AWAY_b_multi
rowdf["AWAY_PATSHOT"] = rowdf["AWAY_PATSHOT"].astype(int)
for i in range(len(rowdf.HOME_PATSHOT)):
if rowdf.HOME_PATSHOT[i]>min(rowdf.HOME_TSHOT[i],rowdf.HOME_PASHOT[i]):
rowdf.HOME_PATSHOT[i]=min(rowdf.HOME_TSHOT[i],rowdf.HOME_PASHOT[i])
for i in range(len(rowdf.AWAY_PATSHOT)):
if rowdf.AWAY_PATSHOT[i]>min(rowdf.AWAY_TSHOT[i],rowdf.AWAY_PASHOT[i]):
rowdf.AWAY_PATSHOT[i]=min(rowdf.AWAY_TSHOT[i],rowdf.AWAY_PASHOT[i])
datarow = pd.DataFrame(rowdf)
if rowdata != ():
rowdf['K_NUMBER'] = rowdf['K_NUMBER'].astype(str)
rowdf['K_DAY'] = rowdf['K_DAY'].astype(str)
rowdf['HOME'] = rowdf['HOME'].astype(str)
rowdf['AWAY'] = rowdf['AWAY'].astype(str)
rowdf['HOME_GOAL'] = rowdf['HOME_GOAL'].astype(str)
rowdf['AWAY_GOAL'] = rowdf['AWAY_GOAL'].astype(str)
rowdf['HOME_SHOT'] = rowdf['HOME_SHOT'].astype(str)
rowdf['AWAY_SHOT'] = rowdf['AWAY_SHOT'].astype(str)
rowdf['HOME_TSHOT'] = rowdf['HOME_TSHOT'].astype(str)
rowdf['AWAY_TSHOT'] = rowdf['AWAY_TSHOT'].astype(str)
rowdf['HOME_PASHOT'] = rowdf['HOME_PASHOT'].astype(str)
rowdf['AWAY_PASHOT'] = rowdf['AWAY_PASHOT'].astype(str)
rowdf['HOME_PATSHOT'] = rowdf['HOME_PATSHOT'].astype(str)
rowdf['AWAY_PATSHOT'] = rowdf['AWAY_PATSHOT'].astype(str)
rowdf['HOME_DRB'] = rowdf['HOME_DRB'].astype(str)
rowdf['AWAY_DRB'] = rowdf['AWAY_DRB'].astype(str)
rowdf['HOME_FSIN'] = rowdf['HOME_FSIN'].astype(str)
rowdf['AWAY_FSIN'] = rowdf['AWAY_FSIN'].astype(str)
rowdf['HOME_PASS'] = rowdf['HOME_PASS'].astype(str)
rowdf['AWAY_PASS'] = rowdf['AWAY_PASS'].astype(str)
rowdf['HOME_KPASS'] = rowdf['HOME_KPASS'].astype(str)
rowdf['AWAY_KPASS'] = rowdf['AWAY_KPASS'].astype(str)
rowdf['HOME_FPASS'] = rowdf['HOME_FPASS'].astype(str)
rowdf['AWAY_FPASS'] = rowdf['AWAY_FPASS'].astype(str)
rowdf['HOME_BPASS'] = rowdf['HOME_BPASS'].astype(str)
rowdf['AWAY_BPASS'] = rowdf['AWAY_BPASS'].astype(str)
rowdf['HOME_LPASS'] = rowdf['HOME_LPASS'].astype(str)
rowdf['AWAY_LPASS'] = rowdf['AWAY_LPASS'].astype(str)
rowdf['HOME_CROSS'] = rowdf['HOME_CROSS'].astype(str)
rowdf['AWAY_CROSS'] = rowdf['AWAY_CROSS'].astype(str)
rowdf['HOME_PASSSU'] = rowdf['HOME_PASSSU'].astype(str)
rowdf['AWAY_PASSSU'] = rowdf['AWAY_PASSSU'].astype(str)
rowdf['HOME_BATGRD'] = rowdf['HOME_BATGRD'].astype(str)
rowdf['AWAY_BATGRD'] = rowdf['AWAY_BATGRD'].astype(str)
rowdf['HOME_BATAIR'] = rowdf['HOME_BATAIR'].astype(str)
rowdf['AWAY_BATAIR'] = rowdf['AWAY_BATAIR'].astype(str)
rowdf['HOME_TACK'] = rowdf['HOME_TACK'].astype(str)
rowdf['AWAY_TACK'] = rowdf['AWAY_TACK'].astype(str)
rowdf['HOME_INTERSEP'] = rowdf['HOME_INTERSEP'].astype(str)
rowdf['AWAY_INTERSEP'] = rowdf['AWAY_INTERSEP'].astype(str)
rowdf['HOME_FOUL'] = rowdf['HOME_FOUL'].astype(str)
rowdf['AWAY_FOUL'] = rowdf['AWAY_FOUL'].astype(str)
rowdf['HOME_YLLCARD'] = rowdf['HOME_YLLCARD'].astype(str)
rowdf['AWAY_YLLCARD'] = rowdf['AWAY_YLLCARD'].astype(str)
rowdf['HOME_REDCARD'] = rowdf['HOME_REDCARD'].astype(str)
rowdf['AWAY_REDCARD'] = rowdf['AWAY_REDCARD'].astype(str)
if rowdata != ():
li = np.array(rowdf)
li2 = li[0]
tu=""
for i in li2:
tu = tu + "'"+ i + "',"
if rowdata != ():
names = "K_NUMBER,K_DAY,HOME,AWAY,HOME_GOAL,AWAY_GOAL,HOME_SHOT,AWAY_SHOT,\
HOME_TSHOT,AWAY_TSHOT,HOME_PASHOT,AWAY_PASHOT,HOME_PATSHOT,AWAY_PATSHOT,\
HOME_DRB,AWAY_DRB,HOME_FSIN,AWAY_FSIN,HOME_PASS,AWAY_PASS,HOME_KPASS,AWAY_KPASS,\
HOME_FPASS,AWAY_FPASS,HOME_BPASS,AWAY_BPASS,HOME_LPASS,AWAY_LPASS,\
HOME_CROSS,AWAY_CROSS,HOME_PASSSU,AWAY_PASSSU,HOME_BATGRD,AWAY_BATGRD,\
HOME_BATAIR,AWAY_BATAIR,HOME_TACK,AWAY_TACK,HOME_INTERSEP,AWAY_INTERSEP,\
HOME_FOUL,AWAY_FOUL,HOME_YLLCARD,AWAY_YLLCARD,HOME_REDCARD,AWAY_REDCARD"
sqls= "INSERT INTO kleague ("+names+") VALUES ("+tu[:-1]+")"
# 만약 값이 여러개 들어가 있을 시
if rowdata != ():
if (len(rowdata)) > 1:
rowdf2 = rowdf1[1:]
rowdf2.K_NUMBER = rowdf2.K_NUMBER-1
for i in range(len(rowdf2.K_NUMBER)):
conn = pymysql.connect(host='125.176.85.154',port=3380, user='root', password='1234',
db='server', charset='utf8')
curs = conn.cursor()
sql3= "UPDATE rawdb SET K_NUMBER='%s' WHERE K_NUMBER ='%s'" % (str((int(rowdata[i+1][0])-1)),str((int(rowdata[i+1][0]))))
curs.execute(sql3)
conn.commit()
conn.close()
if rowdata != ():
conn = pymysql.connect(host='125.176.85.154',port=3380, user='root', password='1234',
db='server', charset='utf8')
curs = conn.cursor()
sql = sqls
curs.execute(sql)
conn.commit()
# Connection 닫기
conn.close()
rowdata = ()
## 승부예측
conn = pymysql.connect(host='125.176.85.154',port=3380, user='root', password='1234',
db='server', charset='utf8')
curs = conn.cursor()
sql = "select * from kleague" #2019년 제외 DB 불러오기
curs.execute(sql)
k_raw = curs.fetchall()
# Connection 닫기
conn.close()
results = pd.DataFrame(list(k_raw),columns=['K_NUMBER','K_DAY','HOME','AWAY',\
'HOME_GOAL','AWAY_GOAL','HOME_SHOT','AWAY_SHOT',\
'HOME_TSHOT','AWAY_TSHOT','HOME_PASHOT',\
'AWAY_PASHOT','HOME_PATSHOT','AWAY_PATSHOT',\
'HOME_DRB','AWAY_DRB','HOME_FSIN','AWAY_FSIN',\
'HOME_PASS','AWAY_PASS','HOME_KPASS','AWAY_KPASS'\
,'HOME_FPASS','AWAY_FPASS','HOME_BPASS',\
'AWAY_BPASS','HOME_LPASS','AWAY_LPASS',\
'HOME_CROSS','AWAY_CROSS','HOME_PASSSU',\
'AWAY_PASSSU','HOME_BATGRD','AWAY_BATGRD',\
'HOME_BATAIR','AWAY_BATAIR','HOME_TACK',\
'AWAY_TACK','HOME_INTERSEP','AWAY_INTERSEP',\
'HOME_FOUL','AWAY_FOUL','HOME_YLLCARD',\
'AWAY_YLLCARD','HOME_REDCARD','AWAY_REDCARD'])
results['K_NUMBER'] = results['K_NUMBER'].astype(int)
results['K_DAY'] = results['K_DAY'].astype(object)
results['HOME'] = results['HOME'].astype(object)
results['AWAY'] = results['AWAY'].astype(object)
results['HOME_GOAL'] = results['HOME_GOAL'].astype(int)
results['AWAY_GOAL'] = results['AWAY_GOAL'].astype(int)
results['HOME_SHOT'] = results['HOME_SHOT'].astype(int)
results['AWAY_SHOT'] = results['AWAY_SHOT'].astype(int)
results['HOME_TSHOT'] = results['HOME_TSHOT'].astype(int)
results['AWAY_TSHOT'] = results['AWAY_TSHOT'].astype(int)
results['HOME_PASHOT'] = results['HOME_PASHOT'].astype(int)
results['AWAY_PASHOT'] = results['AWAY_PASHOT'].astype(int)
results['HOME_PATSHOT'] = results['HOME_PATSHOT'].astype(int)
results['AWAY_PATSHOT'] = results['AWAY_PATSHOT'].astype(int)
results['HOME_DRB'] = results['HOME_DRB'].astype(int)
results['AWAY_DRB'] = results['AWAY_DRB'].astype(int)
results['HOME_FSIN'] = results['HOME_FSIN'].astype(int)
results['AWAY_FSIN'] = results['AWAY_FSIN'].astype(int)
results['HOME_PASS'] = results['HOME_PASS'].astype(int)
results['AWAY_PASS'] = results['AWAY_PASS'].astype(int)
results['HOME_KPASS'] = results['HOME_KPASS'].astype(int)
results['AWAY_KPASS'] = results['AWAY_KPASS'].astype(int)
results['HOME_FPASS'] = results['HOME_FPASS'].astype(int)
results['AWAY_FPASS'] = results['AWAY_FPASS'].astype(int)
results['HOME_BPASS'] = results['HOME_BPASS'].astype(int)
results['AWAY_BPASS'] = results['AWAY_BPASS'].astype(int)
results['HOME_LPASS'] = results['HOME_LPASS'].astype(int)
results['AWAY_LPASS'] = results['AWAY_LPASS'].astype(int)
results['HOME_CROSS'] = results['HOME_CROSS'].astype(int)
results['AWAY_CROSS'] = results['AWAY_CROSS'].astype(int)
results['HOME_PASSSU'] = results['HOME_PASSSU'].astype(float)
results['AWAY_PASSSU'] = results['AWAY_PASSSU'].astype(float)
results['HOME_BATGRD'] = results['HOME_BATGRD'].astype(int)
results['AWAY_BATGRD'] = results['AWAY_BATGRD'].astype(int)
results['HOME_BATAIR'] = results['HOME_BATAIR'].astype(int)
results['AWAY_BATAIR'] = results['AWAY_BATAIR'].astype(int)
results['HOME_TACK'] = results['HOME_TACK'].astype(int)
results['AWAY_TACK'] = results['AWAY_TACK'].astype(int)
results['HOME_INTERSEP'] = results['HOME_INTERSEP'].astype(int)
results['AWAY_INTERSEP'] = results['AWAY_INTERSEP'].astype(int)
results['HOME_FOUL'] = results['HOME_FOUL'].astype(int)
results['AWAY_FOUL'] = results['AWAY_FOUL'].astype(int)
results['HOME_YLLCARD'] = results['HOME_YLLCARD'].astype(int)
results['AWAY_YLLCARD'] = results['AWAY_YLLCARD'].astype(int)
results['HOME_REDCARD'] = results['HOME_REDCARD'].astype(int)
results['AWAY_REDCARD'] = results['AWAY_REDCARD'].astype(int)
results.HOME=results.HOME.str.capitalize()
results.AWAY=results.AWAY.str.capitalize()
results_backup = results
## 홈 골 예측
HOME_GOALS = results.HOME_GOAL
for i in range (len(HOME_GOALS)):
if results.HOME_GOAL[i]>3:
HOME_GOALS[i]=3
else:
HOME_GOALS[i] = results.HOME_GOAL[i]
results.loc[:,'HOME_GOALS'] = pd.Series(HOME_GOALS,index=results.index)
y, X = dmatrices('HOME_GOALS ~ HOME_TSHOT + HOME_PATSHOT +\
HOME_FSIN + HOME_PASS + HOME_BPASS + \
HOME_CROSS + HOME_PASSSU + HOME_BATAIR',results, return_type="dataframe" )
y = np.ravel(y)
RegModel = LogisticRegression()
RegModel = RegModel.fit(X,y)
RegModel.score(X,y)
dfCoef = pd.DataFrame({'coef':X.columns, 'weight':RegModel.coef_[0]})
def homestats3(hometeam):
home_count = []
home_round = []
for i in range (len(results['HOME'])):
if results.HOME[i] == hometeam:
home_count.append(results.index[i])
for i in range(3):
home_round.append(max(home_count))
home_count.remove(max(home_count))
return(home_round)
def homestats5(hometeam):
home_count = []
home_round = []
for i in range (len(results['HOME'])):
if results.HOME[i] == hometeam:
home_count.append(results.index[i])
for i in range(5):
home_round.append(max(home_count))
home_count.remove(max(home_count))
return(home_round)
def homecount(hometeam):
home3 = results.loc[homestats3(hometeam)]
home3 = home3.reset_index()
HOME_TSHOT3 = (home3.HOME_TSHOT[0]+home3.HOME_TSHOT[1]+home3.HOME_TSHOT[2])/3
HOME_PATSHOT3 = (home3.HOME_PATSHOT[0]+home3.HOME_PATSHOT[1]+home3.HOME_PATSHOT[2])/3
HOME_FSIN3 = (home3.HOME_FSIN[0]+home3.HOME_FSIN[1]+home3.HOME_FSIN[2])/3
HOME_PASS3 = (home3.HOME_PASS[0]+home3.HOME_PASS[1]+home3.HOME_PASS[2])/3
HOME_BPASS3 = (home3.HOME_BPASS[0]+home3.HOME_BPASS[1]+home3.HOME_BPASS[2])/3
HOME_CROSS3 = (home3.HOME_CROSS[0]+home3.HOME_CROSS[1]+home3.HOME_CROSS[2])/3
HOME_PASSSU3 = (home3.HOME_PASSSU[0]+home3.HOME_PASSSU[1]+home3.HOME_PASSSU[2])/3
HOME_BATAIR3 = (home3.HOME_BATAIR[0]+home3.HOME_BATAIR[1]+home3.HOME_BATAIR[2])/3
home5 = results.loc[homestats5(hometeam)]
home5 = home5.reset_index()
HOME_TSHOT5 = (home5.HOME_TSHOT[0]+home5.HOME_TSHOT[1]+home5.HOME_TSHOT[2]+home5.HOME_TSHOT[3]+home5.HOME_TSHOT[4])/5
HOME_PATSHOT5 = (home5.HOME_PATSHOT[0]+home5.HOME_PATSHOT[1]+home5.HOME_PATSHOT[2]+home5.HOME_PATSHOT[3]+home5.HOME_PATSHOT[4])/5
HOME_FSIN5 = (home5.HOME_FSIN[0]+home5.HOME_FSIN[1]+home5.HOME_FSIN[2]+home5.HOME_FSIN[3]+home5.HOME_FSIN[4])/5
HOME_PASS5 = (home5.HOME_PASS[0]+home5.HOME_PASS[1]+home5.HOME_PASS[2]+home5.HOME_PASS[3]+home5.HOME_PASS[4])/5
HOME_BPASS5 = (home5.HOME_BPASS[0]+home5.HOME_BPASS[1]+home5.HOME_BPASS[2]+home5.HOME_BPASS[3]+home5.HOME_BPASS[4])/5
HOME_CROSS5 = (home5.HOME_CROSS[0]+home5.HOME_CROSS[1]+home5.HOME_CROSS[2]+home5.HOME_CROSS[3]+home5.HOME_CROSS[4])/5
HOME_PASSSU5 = (home5.HOME_PASSSU[0]+home5.HOME_PASSSU[1]+home5.HOME_PASSSU[2]+home5.HOME_PASSSU[3]+home5.HOME_PASSSU[4])/5
HOME_BATAIR5 = (home5.HOME_BATAIR[0]+home5.HOME_BATAIR[1]+home5.HOME_BATAIR[2]+home5.HOME_BATAIR[3]+home5.HOME_BATAIR[4])/5
HOME_TSHOT = (HOME_TSHOT3 + HOME_TSHOT5)/2
HOME_PATSHOT = (HOME_PATSHOT3 + HOME_PATSHOT5)/2
HOME_FSIN = (HOME_FSIN3 + HOME_FSIN5)/2
HOME_PASS = (HOME_PASS3 + HOME_PASS5)/2
HOME_BPASS = (HOME_BPASS3 + HOME_BPASS5)/2
HOME_CROSS = (HOME_CROSS3 + HOME_CROSS5)/2
HOME_PASSSU = (HOME_PASSSU3 + HOME_PASSSU5)/2
HOME_BATAIR = (HOME_BATAIR3 + HOME_BATAIR5)/2
HOME_count = RegModel.predict_proba([[0,HOME_TSHOT,HOME_PATSHOT,HOME_FSIN,HOME_PASS,HOME_BPASS,HOME_CROSS,HOME_PASSSU,HOME_BATAIR]])
return(HOME_count)
## 어웨이 골 예측
AWAY_GOALS = results.AWAY_GOAL
for i in range (len(AWAY_GOALS)):
if results.AWAY_GOAL[i]>3:
AWAY_GOALS[i]=3
else:
AWAY_GOALS[i] = results.AWAY_GOAL[i]
results.loc[:,'AWAY_GOALS'] = pd.Series(AWAY_GOALS,index=results.index)
y2, X2 = dmatrices('AWAY_GOALS ~ AWAY_SHOT + AWAY_TSHOT + \
AWAY_PATSHOT + AWAY_FSIN + AWAY_PASS + AWAY_BPASS',results, return_type="dataframe" )
y2 = np.ravel(y2)
RegModel2 = LogisticRegression()
RegModel2 = RegModel2.fit(X2,y2)
RegModel2.score(X2,y2)
dfCoef2 = pd.DataFrame({'coef':X2.columns, 'weight':RegModel2.coef_[0]})
def awaystats3(awayteam):
away_count = []
away_round = []
for i in range (len(results['AWAY'])):
if results.AWAY[i] == awayteam:
away_count.append(results.index[i])
for i in range(3):
away_round.append(max(away_count))
away_count.remove(max(away_count))
return(away_round)
def awaystats5(awayteam):
away_count = []
away_round = []
for i in range (len(results['AWAY'])):
if results.AWAY[i] == awayteam:
away_count.append(results.index[i])
for i in range(5):
away_round.append(max(away_count))
away_count.remove(max(away_count))
return(away_round)
def awaycount(awayteam):
away3 = results.loc[awaystats3(awayteam)]
away3 = away3.reset_index()
AWAY_SHOT3 = (away3.AWAY_SHOT[0]+away3.AWAY_SHOT[1]+away3.AWAY_SHOT[2])/3
AWAY_TSHOT3 = (away3.AWAY_TSHOT[0]+away3.AWAY_TSHOT[1]+away3.AWAY_TSHOT[2])/3
AWAY_PATSHOT3 = (away3.AWAY_PATSHOT[0]+away3.AWAY_PATSHOT[1]+away3.AWAY_PATSHOT[2])/3
AWAY_FSIN3 = (away3.AWAY_FSIN[0]+away3.AWAY_FSIN[1]+away3.AWAY_FSIN[2])/3
AWAY_PASS3 = (away3.AWAY_PASS[0]+away3.AWAY_PASS[1]+away3.AWAY_PASS[2])/3
AWAY_BPASS3 = (away3.AWAY_BPASS[0]+away3.AWAY_BPASS[1]+away3.AWAY_BPASS[2])/3
away5 = results.loc[awaystats5(awayteam)]
away5 = away5.reset_index()
AWAY_SHOT5 = (away5.AWAY_SHOT[0]+away5.AWAY_SHOT[1]+away5.AWAY_SHOT[2]+away5.AWAY_SHOT[3]+away5.AWAY_SHOT[4])/5
AWAY_TSHOT5 = (away5.AWAY_TSHOT[0]+away5.AWAY_TSHOT[1]+away5.AWAY_TSHOT[2]+away5.AWAY_TSHOT[3]+away5.AWAY_TSHOT[4])/5
AWAY_PATSHOT5 = (away5.AWAY_PATSHOT[0]+away5.AWAY_PATSHOT[1]+away5.AWAY_PATSHOT[2]+away5.AWAY_PATSHOT[3]+away5.AWAY_PATSHOT[4])/5
AWAY_FSIN5 = (away5.AWAY_FSIN[0]+away5.AWAY_FSIN[1]+away5.AWAY_FSIN[2]+away5.AWAY_FSIN[3]+away5.AWAY_FSIN[4])/5
AWAY_PASS5 = (away5.AWAY_PASS[0]+away5.AWAY_PASS[1]+away5.AWAY_PASS[2]+away5.AWAY_PASS[3]+away5.AWAY_PASS[4])/5
AWAY_BPASS5 = (away5.AWAY_BPASS[0]+away5.AWAY_BPASS[1]+away5.AWAY_BPASS[2]+away5.AWAY_BPASS[3]+away5.AWAY_BPASS[4])/5
AWAY_SHOT = (AWAY_SHOT3 + AWAY_SHOT5)/2
AWAY_TSHOT = (AWAY_TSHOT3 + AWAY_TSHOT5)/2
AWAY_PATSHOT = (AWAY_PATSHOT3 + AWAY_PATSHOT5)/2
AWAY_FSIN = (AWAY_FSIN3 + AWAY_FSIN5)/2
AWAY_PASS = (AWAY_PASS3 + AWAY_PASS5)/2
AWAY_BPASS = (AWAY_BPASS3 + AWAY_BPASS5)/2
AWAY_count = RegModel2.predict_proba([[0,AWAY_SHOT,AWAY_TSHOT,AWAY_PATSHOT,AWAY_FSIN,AWAY_PASS,AWAY_BPASS]])
return(AWAY_count)
try:
if home == home_name.capitalize() and away == away_name.capitalize():
future =()
else :
home = home_name.capitalize()
away = away_name.capitalize()
future = [[knumber,home,away]]
except:
future =()
pass
try:
if future != ():
home_score = homecount(home)
away_score = awaycount(away)
HOME = home
AWAY = away
H0 = (home_score[0][0])
H1 = (home_score[0][1])
H2 = (home_score[0][2])
H3 = (home_score[0][3])
A0 = (away_score[0][0])
A1 = (away_score[0][1])
A2 = (away_score[0][2])
A3 = (away_score[0][3])
Home_data = [H0,H1,H2,H3]
h_goal = Home_data.index(max(Home_data))
Away_data = [A0,A1,A2,A3]
a_goal = Away_data.index(max(Away_data))
H0 = str(round(H0,4))
H1 = str(round(H1,4))
H2 = str(round(H2,4))
H3 = str(round(H3,4))
A0 = str(round(A0,4))
A1 = str(round(A1,4))
A2 = str(round(A2,4))
A3 = str(round(A3,4))
if h_goal>a_goal:
HDA = 'H'
elif h_goal==a_goal:
HDA = 'D'
elif h_goal<a_goal:
HDA = 'A'
else:
HDA = 'error'
h_goal = str(h_goal)
a_goal = str(a_goal)
except:
future=()
pass
if future != ():
conn = pymysql.connect(host='125.176.85.154',port=3380, user='root', password='1234',
db='server', charset='utf8')
curs = conn.cursor()
sql = "select * from resultdb" # 경기 번호불러오기 위한 데이터
curs.execute(sql)
raw = curs.fetchall()
if raw != ():
num = str((int(raw[-1][0]))+1)
else:
num = '0'
li = [num,HOME,AWAY,HDA,h_goal,a_goal,H0,H1,H2,H3,A0,A1,A2,A3]
tu=""
for i in li:
tu = tu + "'"+ i + "',"
if future != ():
names = "K_NUMBER,HOME,AWAY,WIN,HOME_GOAL,AWAY_GOAL,H0,H1,H2,H3,A0,A1,A2,A3"
sqls= "INSERT INTO resultdb ("+names+") VALUES ("+tu[:-1]+")"
if future != ():
conn = pymysql.connect(host='125.176.85.154',port=3380, user='root', password='1234',
db='server', charset='utf8')
curs = conn.cursor()
sql = sqls
curs.execute(sql)
conn.commit()
# Connection 닫기
conn.close()
rowdata = ()