The field i'm trying to get on the page is this:
<script data-test-gtm-script>
// Fill GtmDataLayer with data
gtmDataLayer = window.gtmDataLayer || [];
gtmDataLayer.push({
'userId' : '5a96c0b1-1caf-4fb3-8272-c94a1796f6ec',
'isAnonymous' : 'true',
'language' : 'nl',
'seriousness' : '0|false',
'pagetype' : 'object',
'allowPersonalisatie' : 'true',
'allowAdvertenties' : 'true',
'allowAnalytisch' : 'true',
'allowFunctioneel' : 'true',
'explicitAcceptOfCookies' : 'true',
'makelaarVestigingnummer' : '62303',
'makelaarsvereniging' : 'Vbo',
'brochure' : 'True',
'foto360' : 'False',
'plattegrond' : 'True',
'toppositie' : 'False',
'video' : 'True',
'promolabel' : 'False',
'inbeeld' : 'False',
'veiling' : 'False',
'veilingproduct' : 'False',
'openhuizendagpromotiepakket' : 'False',
'soortaanbod' : 'koop',
'objectType' : 'Woonhuis',
'soortObject' : 'Eengezinswoning | hoekwoning',
'aangebodensinds' : 'Vandaag',
'koopprijs' : '300000',
'aantalfotos' : '35',
'globalId' : '5827539',
'soortPlaatsing' : 'Basis',
'plaats' : 'Enschede',
'postcode' : '7534MN',
'provincie' : 'Overijssel',
'flex' : 'False',
'functie_context' : 'koop',
});
// Initialize Google Tag Manager
(function (w, d, s, l, i) {
w[l] = w[l] || []; w[l].push({
'gtm.start':
new Date().getTime(), event: 'gtm.js'
}); var f = d.getElementsByTagName(s)[0],
j = d.createElement(s), dl = l != 'dataLayer' ? '&l=' + l : ''; j.async = true; j.src =
'//www.googletagmanager.com/gtm.js?id=' + i + dl; f.parentNode.insertBefore(j, f);
})(window, document, 'script', 'gtmDataLayer', 'GTM-NPB6HG');
The problem is that I can't convert this to JSON because of the comments. How do I properly get the values? The specific value I need is aangebodensinds
.
What I've tried:
items = response.xpath("//script[contains(., 'aangebodensinds')]/text()").get().replace('
','').strip()
But when I try this to convert this to JSON it fails with errors.
question from:
https://stackoverflow.com/questions/65915558/remove-json-comments-scrapy