形式
Plain text
投稿日時
2019-10-17 04:23
公開期間
無期限
  1. #2019-10-16
  2. # http://chess-results.com/tnr478041.aspx?lan=1&art=5&flag=30
  3. set -e
  4. #CONSTANT uppercase
  5. WORKDIR_PATH='/Users/pierfrancesco.aiello/pier.root/pier_work_settings/local/scripting/shell/chess_results/scraping_swiss_stats/chess_results'
  6. TEMP_FILE_PATH="${WORKDIR_PATH}/tmp_results_from_pgn"
  7. PGN_TO_PROCESS_NAME='397824.pgn'
  8. PGN_TO_PROCESS_PATH="${WORKDIR_PATH}/${PGN_TO_PROCESS_NAME}"
  9. SQLITE3_DB_PATH="${WORKDIR_PATH}/strong_swiss_stats.db"
  10. #GLOBAL_VAR uppercase
  11. CSV_OUTPUT_PATH="${WORKDIR_PATH}/pgn_to_csv.${PGN_TO_PROCESS_NAME}.csv" #to keep csv files if something happens
  12. READ_EVENT=""
  13. READ_DATE=""
  14. READ_WELO=""
  15. READ_BELO=""
  16. READ_RESULT=""
  17. NEXT_LINE=""
  18. CSV_TO_SQL_CMD=".mode csv
  19. .import ${CSV_OUTPUT_PATH} results" #need multiline
  20. #
  21. #
  22. # functions
  23. function extract_second_element_line(){
  24. local input_line="${1}"
  25. echo "${input_line}" | grep -o '".*"' | tr -d '"'
  26. #echo "${input_line}" | grep -o '".*"' #with or without double quotes makes a difference for comparisons.
  27. }
  28. #wget -O chess_results.html http://chess-results.com/tnr478041.aspx?lan=1&art=5&flag=30
  29. #not that immediate to scrape. Copy and paste would help but then it is more manual.
  30. # 2019-10-16
  31. # let's see the PGN (of course the problem can be that not all tournaments have PGNs)
  32. # Yes indeed. More tournaments have the crosstable than the tournaments that have the PGN.
  33. # though the important tournaments seems to have the PNG as well.
  34. # even strong ones have no PGN. http://chess-results.com/tnr307271.aspx?lan=1&art=4&turdet=YES&flag=30
  35. #awk '/^\[/ { print} ' trn478041.pgn #get the metadata from each game, filterning the games
  36. #awk '/^\[/ { print} ' trn478041.pgn | tr -d '[]' #takes away the brackets
  37. #cat pgn_without_brackets.tmp | grep -o '".*"' | tr -d '"' # picks the second argument
  38. awk '/^\[/ { print} ' $PGN_TO_PROCESS_PATH | tr -d '[]' > $TEMP_FILE_PATH
  39. #takes away the brackets
  40. #echo "CREATE TABLE IF NOT EXISTS results(event text, date text, welo integer, belo integer, result integer)" | sqlite3 strong_swiss_stats.db
  41. echo "CREATE TABLE IF NOT EXISTS results(event text, date text, welo integer, belo integer, result integer)" | sqlite3 $SQLITE3_DB_PATH
  42. #create the csv
  43. echo 'event,0.0.0,0,0,-1' > $CSV_OUTPUT_PATH
  44. while read -r NEXT_LINE
  45. do
  46. if [[ "${NEXT_LINE}" =~ 'Event ' ]]; then
  47. READ_EVENT=$( extract_second_element_line "${NEXT_LINE}")
  48. #echo $READ_EVENT
  49. elif [[ "${NEXT_LINE}" =~ 'Date ' ]]; then
  50. READ_DATE=$( extract_second_element_line "${NEXT_LINE}")
  51. elif [[ "${NEXT_LINE}" =~ 'WhiteElo ' ]]; then
  52. READ_WELO=$( extract_second_element_line "${NEXT_LINE}")
  53. elif [[ "${NEXT_LINE}" =~ 'BlackElo ' ]]; then
  54. READ_BELO=$( extract_second_element_line "${NEXT_LINE}")
  55. elif [[ "${NEXT_LINE}" =~ 'Result ' ]]; then
  56. READ_RESULT=$( extract_second_element_line "${NEXT_LINE}")
  57. if [[ "${READ_RESULT}" == "1-0" ]]; then #case is not always so readable
  58. READ_RESULT=2 #white win
  59. elif [[ "${READ_RESULT}" == "1/2-1/2" ]]; then
  60. READ_RESULT=1 #draw
  61. elif [[ "${READ_RESULT}" == "0-1" ]]; then
  62. READ_RESULT=0 #black wins
  63. else
  64. echo "error in result ${READ_RESULT}" && exit 1
  65. fi
  66. echo "${READ_EVENT},${READ_DATE},${READ_WELO},${READ_BELO},${READ_RESULT}" >> $CSV_OUTPUT_PATH
  67. fi
  68. done < $TEMP_FILE_PATH
  69. echo "${CSV_TO_SQL_CMD}" | sqlite3 $SQLITE3_DB_PATH
  70. # 2019-10-16
  71. # with this processed data
  72. #total 15792
  73. #-rw-rw-rw-@ 1 pierfrancesco.aiello staff 370254 16 Okt 20:59 266418.pgn
  74. #-rw-rw-rw-@ 1 pierfrancesco.aiello staff 2575435 16 Okt 20:56 280959.pgn
  75. #-rw-rw-rw-@ 1 pierfrancesco.aiello staff 1616954 16 Okt 20:53 385685.pgn
  76. #-rw-rw-rw-@ 1 pierfrancesco.aiello staff 236086 16 Okt 21:02 397824.pgn
  77. #-rw-rw-rw-@ 1 pierfrancesco.aiello staff 498127 16 Okt 21:01 442666.pgn
  78. #-rw-r--r-- 1 pierfrancesco.aiello staff 19327 16 Okt 21:00 pgn_to_csv.266418.pgn.csv
  79. #-rw-r--r-- 1 pierfrancesco.aiello staff 73574 16 Okt 20:58 pgn_to_csv.280959.pgn.csv
  80. #-rw-r--r-- 1 pierfrancesco.aiello staff 61931 16 Okt 20:54 pgn_to_csv.385685.pgn.csv
  81. #-rw-r--r-- 1 pierfrancesco.aiello staff 23307 16 Okt 21:04 pgn_to_csv.397824.pgn.csv
  82. #-rw-r--r-- 1 pierfrancesco.aiello staff 29535 16 Okt 21:02 pgn_to_csv.442666.pgn.csv
  83. #-rw-r--r-- 1 pierfrancesco.aiello staff 25311 16 Okt 20:46 pgn_to_csv.trn478041.pgn.csv
  84. #-rw-r--r-- 1 pierfrancesco.aiello staff 140022 16 Okt 20:12 pgn_without_brackets.tmp
  85. #-rw-r--r-- 1 pierfrancesco.aiello staff 249856 16 Okt 21:04 strong_swiss_stats.db
  86. #-rw-r--r-- 1 pierfrancesco.aiello staff 83810 16 Okt 21:04 tmp_results_from_pgn
  87. #-rw-rw-rw-@ 1 pierfrancesco.aiello staff 1935096 16 Okt 19:03 trn478041.pgn
  88. #sqlite> select count(*) from results where abs(welo-belo)<25;
  89. #71
  90. #sqlite> select count(*) from results where result=1 and abs(welo-belo)<25 union select count(*) from results where result<>1 and abs(welo-belo)<25;
  91. #35
  92. #36
  93. #sqlite> select count(*) from results where result=1 and abs(welo-belo)<50 union select count(*) from results where result<>1 and abs(welo-belo)<50;
  94. #119
  95. #124
  96. #sqlite> select count(*) from results where result=1 and abs(welo-belo)<75 union select count(*) from results where result<>1 and abs(welo-belo)<75;
  97. #283
  98. #294
  99. #sqlite> select count(*) from results where result=1 and abs(welo-belo)<100 union select count(*) from results where result<>1 and abs(welo-belo)<100;
  100. #525
  101. #543
  102. #sqlite> select count(*) from results where result=1 and abs(welo-belo)<150 union select count(*) from results where result<>1 and abs(welo-belo)<150;
  103. #962
  104. #1133
  105. #sqlite> select count(*) from results where result=1 and abs(welo-belo)<200 union select count(*) from results where result<>1 and abs(welo-belo)<200;
  106. #1172
  107. #1494
  108. #sqlite> select count(*) from results where result=1 and abs(welo-belo)<250 union select count(*) from results where result<>1 and abs(welo-belo)<250;
  109. #1254
  110. #1687
  111. #sqlite> select count(*) from results where result=1 and abs(welo-belo)<300 union select count(*) from results where result<>1 and abs(welo-belo)<300;
  112. #1311
  113. #1830
  114. # sqlite> select count(*) from results where welo>2600 and belo>2600;
  115. # 491
  116. # sqlite> select count(*) from results where welo>2600 and belo>2600 and result=1;
  117. # 285
  118. # sqlite> select count(*) from results where welo>2500 and belo>2500;
  119. # 1382
  120. # sqlite> select count(*) from results where welo>2500 and belo>2500 and result=1;
  121. # 719
  122. # games with the 2700
  123. # sqlite> select count(*) from results where welo>2700 or belo>2700;
  124. # 454
  125. # wins for the 2700
  126. # sqlite> select count(*) from results where (welo>2700 and result=2) or (belo>2700 and result=0);
  127. # 84
  128. # sqlite> select count(*) from results where welo>2600 or belo>2600;
  129. # 2005
  130. # wins for the 2600+
  131. # sqlite> select count(*) from results where (welo>2600 and result=2) or (belo>2600 and result=0);
  132. # 564
ダウンロード 印刷用表示

このコピペの URL

JavaScript での埋め込み

iframe での埋め込み

元のテキスト