• R/O
  • HTTP
  • SSH
  • HTTPS

pg_hint_plan: コミット

firtst release


コミットメタ情報

リビジョンa257715e4f47510944c06c9f11805bcc88824e3f (tree)
日時2018-06-08 14:11:06
作者Kyotaro Horiguchi <horiguchi.kyotaro@lab....>
コミッターKyotaro Horiguchi

ログメッセージ

Took in changes of pg_stat_statements.

Took in the following commits on pg_stat_statements. These commits
change unused part of the file so they don't affect the behavior of
pg_hint_plan.

4f37d09169 Avoid unlikely data-loss scenarios due to rename() without fsync.

93840f96c7 Improve contrib/pg_stat_statements' handling of garbage collection failure.

変更サマリ

差分

--- a/pg_stat_statements.c
+++ b/pg_stat_statements.c
@@ -171,7 +171,7 @@ typedef struct pgssEntry
171171 pgssHashKey key; /* hash key of entry - MUST BE FIRST */
172172 Counters counters; /* the statistics for this query */
173173 Size query_offset; /* query text offset in external file */
174- int query_len; /* # of valid bytes in query string */
174+ int query_len; /* # of valid bytes in query string, or -1 */
175175 int encoding; /* query text encoding */
176176 slock_t mutex; /* protects the counters only */
177177 } pgssEntry;
@@ -745,11 +745,7 @@ pgss_shmem_shutdown(int code, Datum arg)
745745 /*
746746 * Rename file into place, so we atomically replace any old one.
747747 */
748- if (rename(PGSS_DUMP_FILE ".tmp", PGSS_DUMP_FILE) != 0)
749- ereport(LOG,
750- (errcode_for_file_access(),
751- errmsg("could not rename pg_stat_statement file \"%s\": %m",
752- PGSS_DUMP_FILE ".tmp")));
748+ (void) durable_rename(PGSS_DUMP_FILE ".tmp", PGSS_DUMP_FILE, LOG);
753749
754750 /* Unlink query-texts file; it's not needed while shutdown */
755751 unlink(PGSS_TEXT_FILE);
@@ -1649,7 +1645,8 @@ entry_cmp(const void *lhs, const void *rhs)
16491645 }
16501646
16511647 /*
1652- * Deallocate least used entries.
1648+ * Deallocate least-used entries.
1649+ *
16531650 * Caller must hold an exclusive lock on pgss->lock.
16541651 */
16551652 static void
@@ -1660,17 +1657,27 @@ entry_dealloc(void)
16601657 pgssEntry *entry;
16611658 int nvictims;
16621659 int i;
1663- Size totlen = 0;
1660+ Size tottextlen;
1661+ int nvalidtexts;
16641662
16651663 /*
16661664 * Sort entries by usage and deallocate USAGE_DEALLOC_PERCENT of them.
16671665 * While we're scanning the table, apply the decay factor to the usage
1668- * values.
1666+ * values, and update the mean query length.
1667+ *
1668+ * Note that the mean query length is almost immediately obsolete, since
1669+ * we compute it before not after discarding the least-used entries.
1670+ * Hopefully, that doesn't affect the mean too much; it doesn't seem worth
1671+ * making two passes to get a more current result. Likewise, the new
1672+ * cur_median_usage includes the entries we're about to zap.
16691673 */
16701674
16711675 entries = palloc(hash_get_num_entries(pgss_hash) * sizeof(pgssEntry *));
16721676
16731677 i = 0;
1678+ tottextlen = 0;
1679+ nvalidtexts = 0;
1680+
16741681 hash_seq_init(&hash_seq, pgss_hash);
16751682 while ((entry = hash_seq_search(&hash_seq)) != NULL)
16761683 {
@@ -1680,20 +1687,27 @@ entry_dealloc(void)
16801687 entry->counters.usage *= STICKY_DECREASE_FACTOR;
16811688 else
16821689 entry->counters.usage *= USAGE_DECREASE_FACTOR;
1683- /* Accumulate total size, too. */
1684- totlen += entry->query_len + 1;
1690+ /* In the mean length computation, ignore dropped texts. */
1691+ if (entry->query_len >= 0)
1692+ {
1693+ tottextlen += entry->query_len + 1;
1694+ nvalidtexts++;
1695+ }
16851696 }
16861697
1698+ /* Sort into increasing order by usage */
16871699 qsort(entries, i, sizeof(pgssEntry *), entry_cmp);
16881700
1701+ /* Record the (approximate) median usage */
16891702 if (i > 0)
1690- {
1691- /* Record the (approximate) median usage */
16921703 pgss->cur_median_usage = entries[i / 2]->counters.usage;
1693- /* Record the mean query length */
1694- pgss->mean_query_len = totlen / i;
1695- }
1704+ /* Record the mean query length */
1705+ if (nvalidtexts > 0)
1706+ pgss->mean_query_len = tottextlen / nvalidtexts;
1707+ else
1708+ pgss->mean_query_len = ASSUMED_LENGTH_INIT;
16961709
1710+ /* Now zap an appropriate fraction of lowest-usage entries */
16971711 nvictims = Max(10, i * USAGE_DEALLOC_PERCENT / 100);
16981712 nvictims = Min(nvictims, i);
16991713
@@ -1836,7 +1850,7 @@ qtext_load_file(Size *buffer_size)
18361850 }
18371851
18381852 /* Allocate buffer; beware that off_t might be wider than size_t */
1839- if (stat.st_size <= MaxAllocSize)
1853+ if (stat.st_size <= MaxAllocHugeSize)
18401854 buf = (char *) malloc(stat.st_size);
18411855 else
18421856 buf = NULL;
@@ -1844,7 +1858,9 @@ qtext_load_file(Size *buffer_size)
18441858 {
18451859 ereport(LOG,
18461860 (errcode(ERRCODE_OUT_OF_MEMORY),
1847- errmsg("out of memory")));
1861+ errmsg("out of memory"),
1862+ errdetail("Could not allocate enough memory to read pg_stat_statement file \"%s\".",
1863+ PGSS_TEXT_FILE)));
18481864 CloseTransientFile(fd);
18491865 return NULL;
18501866 }
@@ -1946,13 +1962,17 @@ need_gc_qtexts(void)
19461962 * occur in the foreseeable future.
19471963 *
19481964 * The caller must hold an exclusive lock on pgss->lock.
1965+ *
1966+ * At the first sign of trouble we unlink the query text file to get a clean
1967+ * slate (although existing statistics are retained), rather than risk
1968+ * thrashing by allowing the same problem case to recur indefinitely.
19491969 */
19501970 static void
19511971 gc_qtexts(void)
19521972 {
19531973 char *qbuffer;
19541974 Size qbuffer_size;
1955- FILE *qfile;
1975+ FILE *qfile = NULL;
19561976 HASH_SEQ_STATUS hash_seq;
19571977 pgssEntry *entry;
19581978 Size extent;
@@ -1967,12 +1987,15 @@ gc_qtexts(void)
19671987 return;
19681988
19691989 /*
1970- * Load the old texts file. If we fail (out of memory, for instance) just
1971- * skip the garbage collection.
1990+ * Load the old texts file. If we fail (out of memory, for instance),
1991+ * invalidate query texts. Hopefully this is rare. It might seem better
1992+ * to leave things alone on an OOM failure, but the problem is that the
1993+ * file is only going to get bigger; hoping for a future non-OOM result is
1994+ * risky and can easily lead to complete denial of service.
19721995 */
19731996 qbuffer = qtext_load_file(&qbuffer_size);
19741997 if (qbuffer == NULL)
1975- return;
1998+ goto gc_fail;
19761999
19772000 /*
19782001 * We overwrite the query texts file in place, so as to reduce the risk of
@@ -2007,6 +2030,7 @@ gc_qtexts(void)
20072030 /* Trouble ... drop the text */
20082031 entry->query_offset = 0;
20092032 entry->query_len = -1;
2033+ /* entry will not be counted in mean query length computation */
20102034 continue;
20112035 }
20122036
@@ -2091,7 +2115,36 @@ gc_fail:
20912115 entry->query_len = -1;
20922116 }
20932117
2094- /* Seems like a good idea to bump the GC count even though we failed */
2118+ /*
2119+ * Destroy the query text file and create a new, empty one
2120+ */
2121+ (void) unlink(PGSS_TEXT_FILE);
2122+ qfile = AllocateFile(PGSS_TEXT_FILE, PG_BINARY_W);
2123+ if (qfile == NULL)
2124+ ereport(LOG,
2125+ (errcode_for_file_access(),
2126+ errmsg("could not write new pg_stat_statement file \"%s\": %m",
2127+ PGSS_TEXT_FILE)));
2128+ else
2129+ FreeFile(qfile);
2130+
2131+ /* Reset the shared extent pointer */
2132+ pgss->extent = 0;
2133+
2134+ /* Reset mean_query_len to match the new state */
2135+ pgss->mean_query_len = ASSUMED_LENGTH_INIT;
2136+
2137+ /*
2138+ * Bump the GC count even though we failed.
2139+ *
2140+ * This is needed to make concurrent readers of file without any lock on
2141+ * pgss->lock notice existence of new version of file. Once readers
2142+ * subsequently observe a change in GC count with pgss->lock held, that
2143+ * forces a safe reopen of file. Writers also require that we bump here,
2144+ * of course. (As required by locking protocol, readers and writers don't
2145+ * trust earlier file contents until gc_count is found unchanged after
2146+ * pgss->lock acquired in shared or exclusive mode respectively.)
2147+ */
20952148 record_gc_qtexts();
20962149 }
20972150
旧リポジトリブラウザで表示