mirror of
https://github.com/FranP-code/Allnews.com.git
synced 2025-10-13 00:32:45 +00:00
a
This commit is contained in:
0
.gitignore
vendored
Executable file
0
.gitignore
vendored
Executable file
@@ -0,0 +1,43 @@
|
||||
Sdocument = document.getElementsByClassName("blob js-post-images-container")[0]
|
||||
|
||||
big_blank_space = Sdocument.getElementsByClassName('base-wrapper-image')
|
||||
console.log(big_blank_space);
|
||||
|
||||
ad = Sdocument.getElementsByClassName('desvio-container')
|
||||
console.log(ad);
|
||||
|
||||
imgs = Sdocument.querySelectorAll('img')
|
||||
console.log(imgs)
|
||||
|
||||
for (i = 0; i < ad.length; i++) {
|
||||
ad[i].innerHTML = ''
|
||||
}
|
||||
|
||||
for (let i = 0; i < big_blank_space.length; i++) {
|
||||
big_blank_space[i].style.paddingTop = ""
|
||||
|
||||
}
|
||||
|
||||
for (let i = 0; i < imgs.length; i++) {
|
||||
if (imgs[i].src === "") {
|
||||
imgs[i].src = imgs[i].dataset.sfSrc
|
||||
}
|
||||
}
|
||||
|
||||
console.log(Sdocument);
|
||||
|
||||
links = document.getElementsByClassName('article-links')
|
||||
|
||||
links[0].remove()
|
||||
|
||||
paragraphs = document.getElementsByTagName('p')
|
||||
|
||||
for (i = 0; i < paragraphs.length; i++) {
|
||||
if (paragraphs[i].innerHTML.includes("En Xataka |")) {
|
||||
paragraphs[i].remove()
|
||||
}
|
||||
}
|
||||
|
||||
script = document.getElementById('script-estructurator');
|
||||
|
||||
script.remove()
|
||||
@@ -18,7 +18,6 @@
|
||||
<div class="card-container">
|
||||
<?php
|
||||
|
||||
require 'mySQLconnect.php';
|
||||
require 'news_logic.php';
|
||||
|
||||
check_news();
|
||||
|
||||
0
news/00_ids_done.txt
Normal file
0
news/00_ids_done.txt
Normal file
@@ -6,41 +6,26 @@ ini_set('display_errors', 1);
|
||||
function check_news() {
|
||||
$news = glob('./news/*');
|
||||
|
||||
print_r($news);
|
||||
echo '<br>';
|
||||
|
||||
foreach ($news as $new) {
|
||||
if ($new !== './news/00_news_done.txt'){
|
||||
|
||||
echo '<br>';
|
||||
|
||||
echo 'NEW: ' . $new;
|
||||
|
||||
echo '<br>';
|
||||
foreach ($news as $news_unique) {
|
||||
if ($news_unique !== './news/00_news_done.txt' && $news_unique !== './news/00_ids_done.txt' ){
|
||||
|
||||
$news_done = file_get_contents('./news/00_news_done.txt');
|
||||
|
||||
echo $news_done;
|
||||
echo '<br>';
|
||||
|
||||
if (!strstr($news_done, $new)) {
|
||||
$page = know_page($new); //! HACER FUNCION
|
||||
echo $page;
|
||||
$author = know_author($page, $new);
|
||||
echo $author;
|
||||
//$create_entry_in_DB($new, $author); //! HACER FUNCION
|
||||
//file_put_contents($news_done, $new . '/n', FILE_APPEND);
|
||||
if (!strstr($news_done, $news_unique)) {
|
||||
$page = know_page($news_unique);
|
||||
$author = know_author($page, $news_unique);
|
||||
create_entry_in_DB($news_unique, $page, $author); //! HACER FUNCION
|
||||
}
|
||||
|
||||
/*if ($new is in $done) {
|
||||
/*if ($news_unique is in $done) {
|
||||
|
||||
}*/
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
function know_page($new) {
|
||||
switch ($new) {
|
||||
function know_page($news_unique) {
|
||||
switch ($news_unique) {
|
||||
case './news/xataka.html'|| './news/Xataka.html':
|
||||
return 'Xataka';
|
||||
break;
|
||||
@@ -51,8 +36,8 @@ function know_page($new) {
|
||||
}
|
||||
}
|
||||
|
||||
function know_author($page, $new) {
|
||||
$content = file_get_contents($new);
|
||||
function know_author($page, $news_unique) {
|
||||
$content = file_get_contents($news_unique);
|
||||
|
||||
switch ($page) {
|
||||
case 'Xataka':
|
||||
@@ -77,16 +62,43 @@ function get_string_between($string, $start, $end){
|
||||
//! CREDITS: https://stackoverflow.com/questions/5696412/how-to-get-a-substring-between-two-strings-in-phphttps://stackoverflow.com/questions/5696412/how-to-get-a-substring-between-two-strings-in-php
|
||||
}
|
||||
|
||||
/*
|
||||
switch ($variable) {
|
||||
case 'value':
|
||||
# code...
|
||||
break;
|
||||
function create_entry_in_DB($news_unique, $page, $author) {
|
||||
require 'mySQLconnect.php';
|
||||
$content = file_get_contents($news_unique);
|
||||
|
||||
switch ($page) {
|
||||
case 'Xataka':
|
||||
$title = get_string_between($content, '<h1><span>', '</span></h1>');
|
||||
$pre_icon = get_string_between($content, '<div class="base-wrapper-image" style="padding-top: 61.38%;">', '</div>');
|
||||
$icon = get_string_between($pre_icon, 'src=', ' ');
|
||||
$inner_HTML = get_string_between($content, '<div class="article-content">', '<div class="article-content-outer">') . '<script id="script-estructurator" src="3lqzK81oyJW4C+q8OXEsRs7xuJco4Gz9ewZc993eBZwfxOqs3ToZOJ9KYmX5v0IEG83ds9TcRSvHyhztvNs9KyucmzRo7IxfonPGF+PFg99QZn3EOfTul3GeCApquf6/5WS70jg66hp3mYWfcpK5B5kbJWIF/NhXHUusw2jtsrw7MsZ0J3TzL0s/g9UZhj30/LtiHKDBL2nWtFVCo/MiOZcfRmMyFSi6QhJnoi7Ri5GcVHym6tCAUGXiPaAWEmikxfosgrUDyjUp4hCdos9jFEQO+G7DE50h3dKWIEKlrVPaDbygJA9d47TEvcSq7FTD1f3PnTeibUV+VBIi4ZgRpHrlk45FBUKvdxeGquoAvApW3734L0.js"></script>';
|
||||
$frist_p = strip_tags(get_string_between($inner_HTML, '<p>', '</p>'));
|
||||
break;
|
||||
|
||||
default:
|
||||
echo 0;
|
||||
return 0;
|
||||
break;
|
||||
}
|
||||
|
||||
$insert_news = $mySQLconnect -> prepare('insert into noticias (title, content, icon_route, page, author, frist_paragraph) values (?, ?, ?, ?, ?, ?)');
|
||||
|
||||
$insert_news -> bindParam(1, $title, PDO::PARAM_STR);
|
||||
$insert_news -> bindParam(2, $inner_HTML, PDO::PARAM_STR);
|
||||
$insert_news -> bindParam(3, $icon, PDO::PARAM_STR);
|
||||
$insert_news -> bindParam(4, $page, PDO::PARAM_STR);
|
||||
$insert_news -> bindParam(5, $author, PDO::PARAM_STR);
|
||||
$insert_news -> bindParam(6, $frist_p, PDO::PARAM_STR);
|
||||
|
||||
default:
|
||||
# code...
|
||||
break;
|
||||
$insert_news -> execute();
|
||||
|
||||
$ids_done = file_get_contents('./news/00_ids_done.txt');
|
||||
$num = $ids_done + 1;
|
||||
file_put_contents('./news/00_ids_done.txt', $num);
|
||||
rename($news_unique,"./news/$num.html");
|
||||
file_put_contents('./news/00_news_done.txt', "./news/$num.html", FILE_APPEND);
|
||||
|
||||
}
|
||||
*/
|
||||
|
||||
|
||||
?>
|
||||
Reference in New Issue
Block a user