I have tried to compile 3k markdown pages with gatsby and it gets heap out of memory issue. I searched and found the benchmark which is mentioned for large amount of markdown pages, but there is no graphql in the gatsby-node.js and not more number of plugins used in the application.
Is this cause of performance issue? If large amount of markdown pages used in an application, should it need to configure minimal plugins and ignore graphql?
How could use large amount of markdown pages with required plugins and config in gatsby?
System:
OS: Windows 10
CPU: x64 Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz
Binaries:
Yarn: 1.3.2 - C:\Users\ajithr\AppData\Roaming\npm\yarn.CMD
npm: 6.4.1 - C:\Program Files\nodejs\npm.CMD
Browsers:
Edge: 42.17134.1.0
npmPackages:
gatsby: ^2.0.0 => 2.0.7
gatsby-plugin-catch-links: ^2.0.2 => 2.0.2
gatsby-plugin-feed: ^2.0.5 => 2.0.5
gatsby-plugin-google-analytics: ^2.0.5 => 2.0.6
gatsby-plugin-manifest: ^2.0.2 => 2.0.2
gatsby-plugin-nprogress: ^2.0.5 => 2.0.5
gatsby-plugin-offline: ^2.0.5 => 2.0.5
gatsby-plugin-react-helmet: ^3.0.0 => 3.0.0
gatsby-plugin-sharp: ^2.0.5 => 2.0.5
gatsby-plugin-sitemap: ^2.0.1 => 2.0.1
gatsby-plugin-styled-components: ^3.0.0 => 3.0.0
gatsby-plugin-typography: ^2.2.0 => 2.2.0
gatsby-remark-autolink-headers: ^2.0.6 => 2.0.6
gatsby-remark-copy-linked-files: ^2.0.5 => 2.0.5
gatsby-remark-images: ^2.0.1 => 2.0.1
gatsby-remark-prismjs: ^3.0.0 => 3.0.0
gatsby-remark-responsive-iframe: ^2.0.5 => 2.0.5
gatsby-remark-smartypants: ^2.0.5 => 2.0.5
gatsby-source-filesystem: ^2.0.1 => 2.0.1
gatsby-transformer-json: ^2.1.1 => 2.1.1
gatsby-transformer-remark: ^2.1.1 => 2.1.3
gatsby-transformer-sharp: ^2.1.1 => 2.1.1
gatsby-config.js:
const config = require("./src/templates/site-config");
const pathPrefix = config.pathPrefix === "/" ? "" : config.pathPrefix;
module.exports = {
siteMetadata: {
siteUrl: config.siteUrl + pathPrefix
},
pathPrefix: config.pathPrefix,
plugins: [
{
resolve: `gatsby-source-filesystem`,
options: {
path: `${__dirname}/src/pages`,
name: 'pages',
ignore: [`templates/**/`]
},
},
{
resolve: `gatsby-transformer-remark`,
options: {
plugins: [
{
resolve: `gatsby-remark-images`,
options: {
maxWidth: 590,
},
},
{
resolve: `gatsby-remark-responsive-iframe`,
options: {
wrapperStyle: `margin-bottom: 1.0725rem`,
},
},
'gatsby-remark-prismjs',
'gatsby-remark-copy-linked-files',
'gatsby-remark-smartypants',
'gatsby-remark-autolink-headers'
],
},
},
`gatsby-transformer-sharp`,
`gatsby-plugin-sharp`,
`gatsby-plugin-catch-links`,
`gatsby-plugin-sitemap`,
{
resolve: `gatsby-plugin-google-analytics`,
options: {
trackingId: config.googleAnalyticsID,
},
},
{
resolve: "gatsby-plugin-nprogress",
options: {
color: config.themeColor
}
},
`gatsby-plugin-feed`,
{
resolve: `gatsby-plugin-manifest`,
options: {
name: config.siteTitle,
short_name: config.siteTitle,
description: config.siteDescription,
start_url: config.pathPrefix,
background_color: config.backgroundColor,
theme_color: config.themeColor,
display: "minimal-ui",
icons: [
{
src: "/logos/logo-192x192.png",
sizes: "192x192",
type: "image/png"
},
{
src: "/logos/logo-512x512.png",
sizes: "512x512",
type: "image/png"
}
]
}
},
`gatsby-plugin-offline`,
`gatsby-plugin-styled-components`,
`gatsby-plugin-react-helmet`,
{
resolve: 'gatsby-plugin-typography',
options: {
pathToConfigModule: 'src/utils/typography',
},
},
],
}
package.json:
{
"name": "gatsby-starter-blog",
"description": "Starter Gatsby Blog",
"version": "1.0.0",
"author": "Kyle Mathews <[email protected]>",
"bugs": {
"url": "https://github.com/gatsbyjs/gatsby-starter-blog/issues"
},
"dependencies": {
"babel-plugin-styled-components": "^1.7.1",
"core-js": "^2.5.7",
"gatsby": "^2.0.0",
"gatsby-plugin-catch-links": "^2.0.2",
"gatsby-plugin-feed": "^2.0.5",
"gatsby-plugin-google-analytics": "^2.0.5",
"gatsby-plugin-manifest": "^2.0.2",
"gatsby-plugin-nprogress": "^2.0.5",
"gatsby-plugin-offline": "^2.0.5",
"gatsby-plugin-react-helmet": "^3.0.0",
"gatsby-plugin-sharp": "^2.0.5",
"gatsby-plugin-sitemap": "^2.0.1",
"gatsby-plugin-styled-components": "^3.0.0",
"gatsby-plugin-typography": "^2.2.0",
"gatsby-remark-autolink-headers": "^2.0.6",
"gatsby-remark-copy-linked-files": "^2.0.5",
"gatsby-remark-images": "^2.0.1",
"gatsby-remark-prismjs": "^3.0.0",
"gatsby-remark-responsive-iframe": "^2.0.5",
"gatsby-remark-smartypants": "^2.0.5",
"gatsby-source-filesystem": "^2.0.1",
"gatsby-transformer-json": "^2.1.1",
"gatsby-transformer-remark": "^2.1.1",
"gatsby-transformer-sharp": "^2.1.1",
"lodash": "^4.17.11",
"prismjs": "^1.15.0",
"react": "^16.5.1",
"react-dom": "^16.5.1",
"react-helmet": "^5.2.0",
"react-router-dom": "^4.3.1",
"react-typography": "^0.16.13",
"styled-components": "^3.4.9",
"typeface-merriweather": "0.0.43",
"typeface-montserrat": "0.0.43",
"typography": "^0.16.17",
"typography-theme-wordpress-2016": "^0.15.10"
},
"devDependencies": {
"eslint": "^4.19.1",
"eslint-plugin-react": "^7.11.1",
"gh-pages": "^1.2.0",
"gulp": "^3.9.1",
"json-loader": "^0.5.7",
"prettier": "^1.14.2",
"shelljs": "^0.8.2"
},
"homepage": "https://github.com/gatsbyjs/gatsby-starter-blog#readme",
"keywords": [
"gatsby"
],
"license": "MIT",
"main": "n/a",
"repository": {
"type": "git",
"url": "git+https://github.com/gatsbyjs/gatsby-starter-blog.git"
},
"scripts": {
"dev": "gatsby develop",
"lint": "./node_modules/.bin/eslint --ext .js,.jsx --ignore-pattern public .",
"test": "echo \"Error: no test specified\" && exit 1",
"format": "prettier --trailing-comma es5 --no-semi --single-quote --write 'src/**/*.js' 'src/**/*.md'",
"develop": "gatsby develop",
"build": "gatsby build",
"deploy": "gatsby build --prefix-paths && gh-pages -d public",
"fix-semi": "eslint --quiet --ignore-pattern node_modules --ignore-pattern public --parser babel-eslint --no-eslintrc --rule '{\"semi\": [2, \"never\"], \"no-extra-semi\": [2]}' --fix gatsby-node.js"
}
}
gatsby-node.js:
const _ = require('lodash')
const Promise = require('bluebird')
const path = require('path')
const { createFilePath } = require('gatsby-source-filesystem')
var fs = require('fs');
var shelljs = require('shelljs');
var createTab = require('./build/tab.js').createTab;
var common = require('./build/common.js');
var gcsecx = null;
exports.onCreateNode = ({ node, actions, getNode }) => {
const { createNodeField } = actions
let slug;
if (node.internal.type === `MarkdownRemark`) {
const value = createFilePath({ node, getNode })
const fileNode = getNode(node.parent);
const parsedFilePath = path.parse(fileNode.relativePath);
if (node.internal.content) {
node.internal.content = createTab(node);
}
if (parsedFilePath.name !== "index" && parsedFilePath.dir !== "") {
slug = `/${parsedFilePath.dir}/${parsedFilePath.name}/`;
} else if (parsedFilePath.dir === "") {
slug = `/${parsedFilePath.name}/`;
} else {
slug = `/${parsedFilePath.dir}/`;
}
if (!gcsecx) {
gcsecx = common.getGcse();
}
createNodeField({ node, name: "parentPath", value: parsedFilePath.dir });
createNodeField({ node, name: "childPath", value: parsedFilePath.name });
createNodeField({ node, name: "slug", value: slug });
createNodeField({ node, name: "gcse", value: gcsecx });
createNodeField({ node, name: "header", value: getHeader(node.rawMarkdownBody.match(/[#]+ [^\n]+/g)) });
}
}
exports.createPages = ({ graphql, actions }) => {
const { createPage } = actions
return new Promise((resolve, reject) => {
const layout = path.resolve('./src/templates/layout.js')
resolve(
graphql(
`
{
allMarkdownRemark {
edges {
node {
html
id
frontmatter {
title
description
component
}
fields {
slug
parentPath
childPath
gcse
header
}
}
}
}
}
`
).then(result => {
if (result.errors) {
console.log(result.errors)
reject(result.errors)
}
let chapters = [];
// Create blog posts pages.
const posts = result.data.allMarkdownRemark.edges;
for (var i = 0; i < posts.length; i++) {
if (!chapters.length) {
chapters = getChapters(posts);
}
var post = posts[i];
const previous = i === posts.length - 1 ? null : posts[i + 1].node;
const next = i === 0 ? null : posts[i - 1].node;
createPage({
path: post.node.fields.slug,
component: layout,
context: {
slug: post.node.fields.slug,
previous,
next,
chapters: chapters,
gcse: post.node.fields.gcse,
header: post.node.fields.header
},
})
}
})
)
})
}
let getChapters = (chapters) => {
var data = require('./data');
let treeChapters = [],
parents = [];
let common = {
}
chapters.forEach(chapter => {
let parentPath = chapter.node.fields.parentPath;
if (parents.indexOf(parentPath) !== -1) {
return;
}
parents.push(parentPath);
let dataManager = new data.DataManager(chapters);
let query = new data.Query().where('node.fields.parentPath', 'equal', parentPath);
let childrens = dataManager.executeLocal(query);
let parentId = chapter.node.frontmatter.component;
if (chapter.node.frontmatter.component === 'Common') {
parentId = common[parentPath];
}
if (parentId && parentId.length) {
var childObj = getChildOrder(parentPath, childrens);
let parentObj = {
id: parentPath,
child: childObj,
name: parentId
}
treeChapters.push(parentObj);
} else {
childrens = getChildOrder(parentPath, childrens);
for (var i = 0; i < childrens.length; i++) {
treeChapters.push(childrens[i]);
}
}
});
return treeChapters;
}
let getChildOrder = (parentPath, childrens) => {
var parent = parentPath.length ? '/' + parentPath : parentPath;
var summaryPath = './src/pages' + parent + '/summary.txt';
if (fs.existsSync(summaryPath)) {
var orderedList = [];
var summaryFile = fs.readFileSync(summaryPath, 'utf8').replace(/.md/g, '');
var orders = summaryFile.split('\r\n');
for (var i = 0; i < orders.length; i++) {
for (j = 0; j < childrens.length; j++) {
let childPath = childrens[j].node.fields.childPath;
let childFile = './src/pages' + parent + '/' + childPath + '.md';
if (!fs.existsSync(childFile)) {
console.log(childFile + ' not found');
continue;
}
if (orders[i] === childPath) {
orderedList.push({
parentId: parentPath,
id: parentPath + '/' + childPath,
name: childrens[j].node.frontmatter.nodeName || getChildName(childPath)
});
}
}
}
return orderedList;
} else {
return childrens;
}
}
let getChildName = (childName) => {
var splitted = childName.split('-');
var name = splitted[0].charAt(0).toUpperCase() + splitted[0].slice(1);
for (var i = 1; i < splitted.length; i++) {
name = name + splitted[i].charAt(0).toUpperCase() + splitted[i].slice(1);
}
return name;
}
let getHeader = (headers) => {
var toc = '';
if (headers && headers.length) {
toc = '<span>Contents</span>\n<ul>';
for (var header of headers) {
var hLevel = header.match(/#/g).length;
if (hLevel > 1) {
var hName = header.replace(/[#]+ /, '').trim();
var hId = hName.toLowerCase().replace(/[ ]+/g, '-');
toc += `\n <li class="doc-anchor-h${hLevel}"><a href="#${hId}">${hName}</a></li>`;
}
}
toc += '\n</ul>';
}
return toc;
}
```success source and transform nodes β 126.943 s
success building schema β 4.738 s
β createPages
<--- Last few GCs --->
[17700:000001A072EE6310] 1338918 ms: Mark-sweep 1373.9 (1570.6) -> 1373.9 (1570.6) MB, 1924.8 / 0.1 ms allocation failure GC in old space requested
[17700:000001A072EE6310] 1340517 ms: Mark-sweep 1373.9 (1570.6) -> 1373.9 (1549.1) MB, 1591.3 / 0.1 ms last resort GC in old space requested
[17700:000001A072EE6310] 1342166 ms: Mark-sweep 1373.9 (1549.1) -> 1373.9 (1536.1) MB, 1648.4 / 0.1 ms last resort GC in old space requested
<--- JS stacktrace --->
==== JS stack trace =========================================
Security context: 0000017216425879
1: /* anonymous */ [D:\Important\apps\docs\node_modules\lodash\defaults.js:~33] [pc=000003B2EEFC8D81](this=00000297B2175861
Hey! Sorry you ran into trouble. Can you share your site possibly with us?
The markdown benchmark site queries markdown here: https://github.com/gatsbyjs/gatsby/blob/master/benchmarks/markdown/src/templates/blank.js
/cc @DSchau
Almost certain this will be the same solution re: caching. I think the cache that gets loaded (in memory) is too large, so disabling the cache is _a_ solution, just don't think it's the best one.
See #8435 for what I think is a better solution, just not sure it's ready for primetime.
@ajithr would you be able to check out that branch, and see if you can pull it into your local and test against it (see contributing to Gatsby for more info)? I can help out if you're interested!
@DSchau @KyleAMathews Thanks for the quick response. I have checked with the PR you provided. Sadly, I am still facing the same memory issue on createPages hook from my end. I have created a simple application and the same can be referred in the below link.
https://github.com/ajithr/gatsby-starter-blog
Can you let me know, if any plugin or config is the cause of the memory issue? So that, I can ignore it and continue to use Gatsby. You guys did a great job and I am very willing to use the Gatsby.
Thanks again :-)
@ajithr thanks for the repo, that's very helpful. I was previously using gatsby-bench which does not manifest the issue after pulling in that PR.
I'll keep working on it, and try to get something set up. I think the "best" course of action might be some mechanism of entirely disabling the cache, but I don't love that, and will keep trying something new. Basically I think JSON stringify is a bottle neck, so stringifying (even to a file like I am in that PR) is still too slow and exhausts memory pretty quickly :/
Basically I think JSON stringify is a bottle neck
Could we test that by adding a simple PR which only writes out the cache once, once the bootstrap finishes?
@KyleAMathews you mean augmenting gatsby-transformer-remark to basically gather its data and cache in onPostBootstrap rather than in onCreateNode right?
I'll do that in a bit.
@DSchau Thanks for the update. Is it possible to disable the cache? If so, is there any disadvantages when using Gatsby without cache and is it fix the memory issue?
As of now, I can use the Gatsby without cache to resolve this problem, Since it is needed for CI process and we won't repeatedly run the same source again and again in the same location. i.e. It is compiled only one time per build. I can enable the cache after you will resolved the issue in Gatsby source.
@ajithr we've talked about it. I'm thinking some type of default cache, which can be swapped/disabled (e.g. with an fs cache, redis cache, etc.) might be really advantageous here, so I'll keep pursuing that.
@ajithr with the latest changes in the PR I mentioned earlier, I was able to build out your site. Want to check it again?

@DSchau Thanks for your response. I have checked the latest changes with my original project. Unfortunately, I am still facing the same issue. I have checked your changes in both Windows and Linux environment. Both are failed with memory issue :-(
It is working fine with my provided dummy markdown data project in both Windows and Linux. but it is not working in my actual project. I think the markdown page content is larger in my actual project which affects the performance. Also we have used images inside the markdown with the sharp plugin, I guess it also affect it.
@DSchau @KyleAMathews I have tried to resolve the memory issue from my end, but it still not working with the latest release. Now, the below issue throws on gatsby build.
D:\api-doc>gatsby build
success open and validate gatsby-config β 0.048 s
success load plugins β 1.015 s
success onPreInit β 12.344 s
success delete html and css files from previous builds β 0.105 s
success initialize cache β 0.019 s
success copy gatsby files β 0.492 s
success onPreBootstrap β 0.111 s
success source and transform nodes β 3.284 s
success building schema β 1.411 s
β createPagescp: no such file or directory: samples/
Sample Copied Successfully!!!
success createPages β 185.121 s
success createPagesStatefully β 3.379 s
success onPreExtractQueries β 0.000 s
success update schema β 35.067 s
success extract queries from components β 0.331 s
success run graphql queries β 334.212 s β 900/900 2.69 queries/second
success write out page data β 0.565 s
success write out redirect data β 0.376 s
success onPostBootstrap β 0.010 s
info bootstrap finished - 622.133 s
error Invalid string length
RangeError: Invalid string length
- JSON.stringify
- stringify.js:5 stringify
[api-doc]/[json-stringify-safe]/stringify.js:5:15
- index.js:90 saveState
[api-doc]/[gatsby]/dist/redux/index.js:90:23
- index.js:118 emitter.on
[api-doc]/[gatsby]/dist/redux/index.js:118:5
- mitt.js:1
[api-doc]/[mitt]/dist/mitt.js:1:268
- Array.map
- mitt.js:1 Object.emit
[api-doc]/[mitt]/dist/mitt.js:1:252
- index.js:430
[api-doc]/[gatsby]/dist/bootstrap/index.js:430:15
- Generator.next
- util.js:16 tryCatcher
[npm]/[gatsby-cli]/[bluebird]/js/release/util.js:16:23
- promise.js:512 Promise._settlePromiseFromHandler
[npm]/[gatsby-cli]/[bluebird]/js/release/promise.js:512:31
- promise.js:569 Promise._settlePromise
[npm]/[gatsby-cli]/[bluebird]/js/release/promise.js:569:18
- promise.js:614 Promise._settlePromise0
[npm]/[gatsby-cli]/[bluebird]/js/release/promise.js:614:10
- promise.js:694 Promise._settlePromises
[npm]/[gatsby-cli]/[bluebird]/js/release/promise.js:694:18
- async.js:138 _drainQueueStep
[npm]/[gatsby-cli]/[bluebird]/js/release/async.js:138:12
- async.js:131 _drainQueue
[npm]/[gatsby-cli]/[bluebird]/js/release/async.js:131:9
- async.js:147 Async._drainQueues
[npm]/[gatsby-cli]/[bluebird]/js/release/async.js:147:5
- async.js:17 Immediate.Async.drainQueues
[npm]/[gatsby-cli]/[bluebird]/js/release/async.js:17:14
Can you please help me to resolve this issue and able to run the gatsby in production?
I have one more doubt in gatsby production output. Is it possible to publish multiple small applications in a same location?
If I could split-up the large project into small sections and consider each section as individual gatsby application, then publish all the sections into a single server location. Is it possible, I don't know?
Example:
app1, app2 and app3 are folders which holds the markdown files. Considering each folder as an application and generate production build for each section. Finally host all the application to a single location like www.example.com/app/.
It may be not working, but I just ask this for your suggestion. Thanks :-)
@ajithr We have to increase the max_old_space_size for the node process doing the build for it to be able to build on netlify. Our site had 4000+ markdown pages.
This is our npm script for doing so
Hope this helps
@Bouncey you still have to do that with Gatsby v2?
@KyleAMathews We never tried it without.
Old issues will be closed after 30 days of inactivity. This issue has been quiet for 20 days and is being marked as stale. Reply here or add the label "not stale" to keep this issue open!
Hey again!
Itβs been 30 days since anything happened on this issue, so our friendly neighborhood robot (thatβs me!) is going to close it.
Please keep in mind that Iβm only a robot, so if Iβve closed this issue in error, Iβm HUMAN_EMOTION_SORRY. Please feel free to reopen this issue or create a new one if you need anything else.
Thanks again for being part of the Gatsby community!
Most helpful comment
I have one more doubt in gatsby production output. Is it possible to publish multiple small applications in a same location?
If I could split-up the large project into small sections and consider each section as individual gatsby application, then publish all the sections into a single server location. Is it possible, I don't know?
Example:
app1, app2 and app3 are folders which holds the markdown files. Considering each folder as an application and generate production build for each section. Finally host all the application to a single location like www.example.com/app/.
It may be not working, but I just ask this for your suggestion. Thanks :-)