diff --git a/.gitignore b/.gitignore new file mode 100755 index 0000000..e69de29 diff --git a/3lqzK81oyJW4C+q8OXEsRs7xuJco4Gz9ewZc993eBZwfxOqs3ToZOJ9KYmX5v0IEG83ds9TcRSvHyhztvNs9KyucmzRo7IxfonPGF+PFg99QZn3EOfTul3GeCApquf6/5WS70jg66hp3mYWfcpK5B5kbJWIF/NhXHUusw2jtsrw7MsZ0J3TzL0s/g9UZhj30/LtiHKDBL2nWtFVCo/MiOZcfRmMyFSi6QhJnoi7Ri5GcVHym6tCAUGXiPaAWEmikxfosgrUDyjUp4hCdos9jFEQO+G7DE50h3dKWIEKlrVPaDbygJA9d47TEvcSq7FTD1f3PnTeibUV+VBIi4ZgRpHrlk45FBUKvdxeGquoAvApW3734L0.js b/3lqzK81oyJW4C+q8OXEsRs7xuJco4Gz9ewZc993eBZwfxOqs3ToZOJ9KYmX5v0IEG83ds9TcRSvHyhztvNs9KyucmzRo7IxfonPGF+PFg99QZn3EOfTul3GeCApquf6/5WS70jg66hp3mYWfcpK5B5kbJWIF/NhXHUusw2jtsrw7MsZ0J3TzL0s/g9UZhj30/LtiHKDBL2nWtFVCo/MiOZcfRmMyFSi6QhJnoi7Ri5GcVHym6tCAUGXiPaAWEmikxfosgrUDyjUp4hCdos9jFEQO+G7DE50h3dKWIEKlrVPaDbygJA9d47TEvcSq7FTD1f3PnTeibUV+VBIi4ZgRpHrlk45FBUKvdxeGquoAvApW3734L0.js new file mode 100755 index 0000000..d1dd252 --- /dev/null +++ b/3lqzK81oyJW4C+q8OXEsRs7xuJco4Gz9ewZc993eBZwfxOqs3ToZOJ9KYmX5v0IEG83ds9TcRSvHyhztvNs9KyucmzRo7IxfonPGF+PFg99QZn3EOfTul3GeCApquf6/5WS70jg66hp3mYWfcpK5B5kbJWIF/NhXHUusw2jtsrw7MsZ0J3TzL0s/g9UZhj30/LtiHKDBL2nWtFVCo/MiOZcfRmMyFSi6QhJnoi7Ri5GcVHym6tCAUGXiPaAWEmikxfosgrUDyjUp4hCdos9jFEQO+G7DE50h3dKWIEKlrVPaDbygJA9d47TEvcSq7FTD1f3PnTeibUV+VBIi4ZgRpHrlk45FBUKvdxeGquoAvApW3734L0.js @@ -0,0 +1,43 @@ +Sdocument = document.getElementsByClassName("blob js-post-images-container")[0] + +big_blank_space = Sdocument.getElementsByClassName('base-wrapper-image') +console.log(big_blank_space); + +ad = Sdocument.getElementsByClassName('desvio-container') +console.log(ad); + +imgs = Sdocument.querySelectorAll('img') +console.log(imgs) + +for (i = 0; i < ad.length; i++) { + ad[i].innerHTML = '' +} + +for (let i = 0; i < big_blank_space.length; i++) { + big_blank_space[i].style.paddingTop = "" + +} + +for (let i = 0; i < imgs.length; i++) { + if (imgs[i].src === "") { + imgs[i].src = imgs[i].dataset.sfSrc + } +} + +console.log(Sdocument); + +links = document.getElementsByClassName('article-links') + +links[0].remove() + +paragraphs = document.getElementsByTagName('p') + +for (i = 0; i < paragraphs.length; i++) { + if (paragraphs[i].innerHTML.includes("En Xataka |")) { + paragraphs[i].remove() + } +} + +script = document.getElementById('script-estructurator'); + +script.remove() \ No newline at end of file diff --git a/index.php b/index.php index 4778af5..ddac30c 100755 --- a/index.php +++ b/index.php @@ -18,7 +18,6 @@
'; - - foreach ($news as $new) { - if ($new !== './news/00_news_done.txt'){ - - echo '
'; - - echo 'NEW: ' . $new; - - echo '
'; + foreach ($news as $news_unique) { + if ($news_unique !== './news/00_news_done.txt' && $news_unique !== './news/00_ids_done.txt' ){ $news_done = file_get_contents('./news/00_news_done.txt'); - - echo $news_done; - echo '
'; - if (!strstr($news_done, $new)) { - $page = know_page($new); //! HACER FUNCION - echo $page; - $author = know_author($page, $new); - echo $author; - //$create_entry_in_DB($new, $author); //! HACER FUNCION - //file_put_contents($news_done, $new . '/n', FILE_APPEND); + if (!strstr($news_done, $news_unique)) { + $page = know_page($news_unique); + $author = know_author($page, $news_unique); + create_entry_in_DB($news_unique, $page, $author); //! HACER FUNCION } - /*if ($new is in $done) { + /*if ($news_unique is in $done) { }*/ } } } -function know_page($new) { - switch ($new) { +function know_page($news_unique) { + switch ($news_unique) { case './news/xataka.html'|| './news/Xataka.html': return 'Xataka'; break; @@ -51,8 +36,8 @@ function know_page($new) { } } -function know_author($page, $new) { - $content = file_get_contents($new); +function know_author($page, $news_unique) { + $content = file_get_contents($news_unique); switch ($page) { case 'Xataka': @@ -77,16 +62,43 @@ function get_string_between($string, $start, $end){ //! CREDITS: https://stackoverflow.com/questions/5696412/how-to-get-a-substring-between-two-strings-in-phphttps://stackoverflow.com/questions/5696412/how-to-get-a-substring-between-two-strings-in-php } -/* -switch ($variable) { - case 'value': - # code... - break; +function create_entry_in_DB($news_unique, $page, $author) { + require 'mySQLconnect.php'; + $content = file_get_contents($news_unique); + + switch ($page) { + case 'Xataka': + $title = get_string_between($content, '

', '

'); + $pre_icon = get_string_between($content, '
', '
'); + $icon = get_string_between($pre_icon, 'src=', ' '); + $inner_HTML = get_string_between($content, '
', '
') . ''; + $frist_p = strip_tags(get_string_between($inner_HTML, '

', '

')); + break; + + default: + echo 0; + return 0; + break; + } + + $insert_news = $mySQLconnect -> prepare('insert into noticias (title, content, icon_route, page, author, frist_paragraph) values (?, ?, ?, ?, ?, ?)'); + + $insert_news -> bindParam(1, $title, PDO::PARAM_STR); + $insert_news -> bindParam(2, $inner_HTML, PDO::PARAM_STR); + $insert_news -> bindParam(3, $icon, PDO::PARAM_STR); + $insert_news -> bindParam(4, $page, PDO::PARAM_STR); + $insert_news -> bindParam(5, $author, PDO::PARAM_STR); + $insert_news -> bindParam(6, $frist_p, PDO::PARAM_STR); - default: - # code... - break; + $insert_news -> execute(); + + $ids_done = file_get_contents('./news/00_ids_done.txt'); + $num = $ids_done + 1; + file_put_contents('./news/00_ids_done.txt', $num); + rename($news_unique,"./news/$num.html"); + file_put_contents('./news/00_news_done.txt', "./news/$num.html", FILE_APPEND); + } -*/ + ?> \ No newline at end of file